Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions numerai_tools/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from scipy import stats
from sklearn.preprocessing import OneHotEncoder


# sometimes when we match up the target/prediction indices,
# changes in stock universe causes some stocks to enter / leave,
# this ensures we don't filter too much
Expand Down Expand Up @@ -163,9 +162,20 @@ def power(df: pd.DataFrame, p: float) -> pd.DataFrame:
"""
assert not df.isna().any().any(), "Data contains NaNs"
assert np.array_equal(df.index.sort_values(), df.index), "Index is not sorted"
result = np.sign(df) * np.abs(df) ** p
assert ((result.std() == 0) | (result.corrwith(df) >= 0.9)).all()
return result
result = np.sign(df.values) * np.abs(df.values) ** p
assert (
(result.std() == 0)
| (
np.array(
[
np.corrcoef(result[:, i], df.values[:, i])[0, 1]
for i in range(0, result.shape[1])
]
)
> 0.9
)
).all()
return pd.DataFrame(result, index=df.index, columns=df.columns)


def gaussian(df: pd.DataFrame) -> pd.DataFrame:
Expand Down
50 changes: 50 additions & 0 deletions tests/test_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,3 +264,53 @@ def test_filter_top_bottom(self):
)
np.testing.assert_allclose(top, [3, 4])
np.testing.assert_allclose(bot, [0, 1])


if __name__ == "__main__":
## Benchmark
import sys
import cProfile
import timeit

s = [x / 4 for x in range(5)] * 650
df = pd.DataFrame({"target": s, "prediction": reversed(s)})

def power_bench():
power(df[["prediction"]], 1.5)

def numerai_corr_bench():
numerai_corr(df[["prediction"]], df["target"], 1.5)

def run_benchmark(bench_func, with_cprofile=False):
if with_cprofile:
# Use cProfile to profile the benchmark
print(f"Profiling {bench_func.__name__} with cProfile:")
profiler = cProfile.Profile()
profiler.enable()
execution_time = timeit.timeit(bench_func, number=1000)
profiler.disable()
profiler.print_stats(sort="cumtime")
else:
# Use timeit to benchmark
execution_time = timeit.timeit(bench_func, number=1000)
print(f"Execution time {bench_func.__name__}: {execution_time:.4f} seconds")

with_cprofile = "--profile" in sys.argv
# Check which benchmark to run from command-line arguments
if "--benchmark" in sys.argv:
selected_bench = sys.argv[sys.argv.index("--benchmark") + 1]
else:
selected_bench = "both" # Default to run both

# Run the selected benchmark(s)
if selected_bench == "power":
run_benchmark(power_bench, with_cprofile)
elif selected_bench == "numerai":
run_benchmark(numerai_corr_bench, with_cprofile)
elif selected_bench == "both":
run_benchmark(power_bench, with_cprofile)
run_benchmark(numerai_corr_bench, with_cprofile)
else:
print(
f"Unknown benchmark: {selected_bench}. Use 'power', 'numerai', or 'both'."
)