Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test-and-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
- name: Test
id: test
run: |
pip install .
pip install .[dev]
python -m unittest discover tests/

- name: Deploy
Expand Down
13 changes: 11 additions & 2 deletions numerai_tools/scoring.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
import os
import functools
from typing import List, Tuple, Union, Optional

import numpy as np
import pandas as pd
from scipy import stats

try:
if os.environ.get("DISABLE_NUMBA_STATS"):
raise ImportError
from numba_stats import norm
except ImportError:
from scipy.stats import norm

from sklearn.preprocessing import OneHotEncoder


Expand Down Expand Up @@ -171,7 +180,7 @@ def gaussian(df: pd.DataFrame) -> pd.DataFrame:
pd.DataFrame - the gaussianized data
"""
assert np.array_equal(df.index.sort_values(), df.index)
return df.apply(lambda series: stats.norm.ppf(series))
return df.apply(functools.partial(norm.ppf, loc=0, scale=1))


def orthogonalize(v: np.ndarray, u: np.ndarray) -> np.ndarray:
Expand Down
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,7 @@ def load(path):
"scipy~=1.11.4",
"scikit-learn>=1.3.0",
],
extras_require={
"dev": ["numba-stats>=1.7.0"]
},
)
27 changes: 20 additions & 7 deletions tests/test_scoring.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import unittest
from unittest.mock import patch

import numpy as np
import pandas as pd
Expand All @@ -21,6 +22,9 @@
stake_weight,
)

from numba_stats import norm as numba_stats_norm
from scipy.stats import norm as scipy_stats_norm


class TestScoring(unittest.TestCase):
def setUp(self):
Expand All @@ -34,6 +38,11 @@ def setUp(self):
self.pos_neg = pd.Series([0, -0, 0.5, -0.5, 1.0, -1.0, 2.0, -2.0]).rename(
"pos_neg"
)
self.s = [x/4 for x in range(5)]
self.df = pd.DataFrame({
"target": self.s,
"prediction": reversed(self.s)
})

def test_correlation(self):
assert np.isclose(correlation(self.up, self.up), 1)
Expand Down Expand Up @@ -201,10 +210,14 @@ def test_neutralize(self):
).all()

def test_numerai_corr_doesnt_clobber_targets(self):
s = [x/4 for x in range(5)]
df = pd.DataFrame({
"target": s,
"prediction": reversed(s)
})
numerai_corr(df[["prediction"]], df["target"])
assert pd.Series(s).equals(df["target"]), f"{s} != {list(df['target'].values)}"
numerai_corr(self.df[["prediction"]], self.df["target"])
assert pd.Series(self.s).equals(self.df["target"]), \
f"{self.s} != {list(self.df['target'].values)}"

def test_numerai_corr_is_same_with_scipy_and_numba(self):
with patch("numerai_tools.scoring.norm", new=scipy_stats_norm):
corr1 = numerai_corr(
self.df[["prediction"]], self.df["target"])
with patch("numerai_tools.scoring.norm", new=numba_stats_norm):
corr2 = numerai_corr(self.df[["prediction"]], self.df["target"])
assert np.isclose(corr1, corr2)