[F] Fix strange ratios for out-of-bound values

2026-04-25 07:53:08 -05:00 · 2022-04-05 01:18:40 -04:00 · 2022-04-05 01:18:40 -04:00 · 7481279c27
commit 7481279c27
parent 64aba4143c
3 changed files with 16 additions and 1 deletions
--- a/sgs/init.py
+++ b/sgs/init.py
@ -1,4 +1,4 @@
 import sgs.api
 import sgs.calculations

-__version__ = '1.0.5'
+__version__ = '1.0.6'
--- a/sgs/api.py
+++ b/sgs/api.py
@ -16,6 +16,7 @@ Gender = Literal['f', 'm']


 _kde_functions: dict[Feature, dict[Gender, gaussian_kde]] = {}
+_kde_boundaries: dict[Feature, tuple[float, float]] = {}


 def load_kde() -> dict[Feature, dict[Gender, gaussian_kde]]:
@ -34,12 +35,17 @@ def load_kde() -> dict[Feature, dict[Gender, gaussian_kde]]:
    data = {k.lower(): data[k] for k in data}

    # Fit KDE functions
+    # Also find boundaries (99th percentile for fem and 1st percentile for masc)
    for feature in data:
        _kde_functions[feature] = {}
        for gender in data[feature]:
            kde = gaussian_kde(data[feature][gender], 'scott')
            _kde_functions[feature][gender] = kde

+        # Boundaries
+        _kde_boundaries[feature] = (np.percentile(data[feature]['m'], 1),
+                                    np.percentile(data[feature]['f'], 99))
+
    return _kde_functions


@ -63,6 +69,14 @@ def _calculate_fem_prob(feature: Feature, value: float) -> float:
    """
    f = load_kde()[feature]['f'].evaluate([value])[0]
    m = load_kde()[feature]['m'].evaluate([value])[0]
+
+    # Boundaries
+    m1, f99 = _kde_boundaries[feature]
+    if value > f99:
+        return 1
+    if value < m1:
+        return 0
+
    return f / (f + m)


--- a/test.py
+++ b/test.py
@ -2,3 +2,4 @@ import sgs

 if __name__ == '__main__':
    print(sgs.api._calculate_fem_prob('pitch', 200))
+    print(sgs.api._calculate_fem_prob('f1', 741))