From 5e69939e2d9bae0a1ab4bfbb1e9d0b04031524a3 Mon Sep 17 00:00:00 2001 From: Alexey Mironov Date: Fri, 12 Dec 2025 10:53:14 +0300 Subject: [PATCH 1/9] Refactor --- README.md | 10 +- pyproject.toml | 4 +- pysatl_criterion/constants.py | 2 + .../__init__.py | 0 .../critical_area}/__init__.py | 0 .../critical_area/critical_areas.py | 38 +++ .../critical_value/critical_area/model.py | 10 + .../cv_calculator/cv_calculator.py | 0 .../critical_value/loader/__init__.py | 0 .../critical_value/loader/remote_loader.py | 35 +++ .../critical_value/resolver/__init__.py | 0 .../resolver/composite_resolver.py | 55 ++++ .../critical_value/resolver/model.py | 29 +++ .../resolver/storage_resolver.py | 65 +++++ pysatl_criterion/p_value/__init__.py | 0 pysatl_criterion/p_value/resolver/__init__.py | 0 .../p_value/resolver/calculation_resolver.py | 64 +++++ .../resolver/local_resolver.py} | 11 +- pysatl_criterion/p_value/resolver/model.py | 26 ++ .../datastorage/datastorage.py | 2 +- .../limit_distribution/limit_distribution.py | 1 + .../goodness_of_fit_test.py | 74 ++++-- pysatl_criterion/test/model.py | 13 + tests/calc/test_cv.py | 2 +- tests/calc/test_goodness_of_fit_test.py | 95 +++---- tests/calc/test_p_value.py | 115 --------- tests/critical_value/test_critical_areas.py | 45 ++++ tests/critical_value/test_remote_loader.py | 243 ++++++++++++++++++ .../resolver/test_calculation_resolver.py | 198 ++++++++++++++ .../datastorage/datastorage_test.py | 4 +- .../limit_distribution_test.py | 4 +- 31 files changed, 930 insertions(+), 215 deletions(-) create mode 100644 pysatl_criterion/constants.py rename pysatl_criterion/{cv_calculator => critical_value}/__init__.py (100%) rename pysatl_criterion/{p_value_calculator => critical_value/critical_area}/__init__.py (100%) create mode 100644 pysatl_criterion/critical_value/critical_area/critical_areas.py create mode 100644 pysatl_criterion/critical_value/critical_area/model.py rename pysatl_criterion/{cv_calculator => critical_value}/cv_calculator/cv_calculator.py (100%) create mode 100644 pysatl_criterion/critical_value/loader/__init__.py create mode 100644 pysatl_criterion/critical_value/loader/remote_loader.py create mode 100644 pysatl_criterion/critical_value/resolver/__init__.py create mode 100644 pysatl_criterion/critical_value/resolver/composite_resolver.py create mode 100644 pysatl_criterion/critical_value/resolver/model.py create mode 100644 pysatl_criterion/critical_value/resolver/storage_resolver.py create mode 100644 pysatl_criterion/p_value/__init__.py create mode 100644 pysatl_criterion/p_value/resolver/__init__.py create mode 100644 pysatl_criterion/p_value/resolver/calculation_resolver.py rename pysatl_criterion/{p_value_calculator/p_value_calculator/p_value_calculator.py => p_value/resolver/local_resolver.py} (87%) create mode 100644 pysatl_criterion/p_value/resolver/model.py create mode 100644 pysatl_criterion/test/model.py delete mode 100644 tests/calc/test_p_value.py create mode 100644 tests/critical_value/test_critical_areas.py create mode 100644 tests/critical_value/test_remote_loader.py create mode 100644 tests/p_value/resolver/test_calculation_resolver.py diff --git a/README.md b/README.md index 62402ef..25c80ea 100644 --- a/README.md +++ b/README.md @@ -24,21 +24,23 @@ You're all set! You can now import and use the statistical tests in your Python ## PySATL Criterion module usage example: +Statistic calculation example: ```python # import needed criterion from pysatl_criterion -from pysatl_criterion import KolmogorovSmirnovNormalityGofStatistic +from pysatl_criterion.statistics import KolmogorovSmirnovNormalityGofStatistic + # make a criterion object -criterion = KolmogorovSmirnovNormalityGofStatistic(mean=0, var=1) +statistic = KolmogorovSmirnovNormalityGofStatistic(mean=0, var=1) # initialize test data x = [0.1, 0.7, 0.5, 0.3] # then run algorithm -statistic = criterion.execute_statistic(x) +result = statistic.execute_statistic(x) # print the results -print(f"Statistic result: {statistic}") +print(f"Statistic result: {result}") # output: # Statistic result: 0.539827837277029 ``` diff --git a/pyproject.toml b/pyproject.toml index 76d095d..8dbc7ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,8 @@ dependencies = [ "scipy>=1.11.2", "pandas>=2.2.1", "typing-extensions>=4.12.2", - "networkx ==3.4.2" + "networkx == 3.4.2", + "sqlalchemy == 2.0.41" ] [project.urls] @@ -48,7 +49,6 @@ ruff = "0.7.4" pytest-mock = "3.14.0" pre-commit = "4.0.1" mypy = "1.15.0" -sqlalchemy = "2.0.41" [tool.isort] line_length = 100 diff --git a/pysatl_criterion/constants.py b/pysatl_criterion/constants.py new file mode 100644 index 0000000..40286be --- /dev/null +++ b/pysatl_criterion/constants.py @@ -0,0 +1,2 @@ +LOCAL_LIMIT_DISTRIBUTION_URL = "sqlite:///limit_distributions.sqlite" +REMOTE_LIMIT_DISTRIBUTION_URL = "postgresql://postgres:postgres@localhost/pysatl" diff --git a/pysatl_criterion/cv_calculator/__init__.py b/pysatl_criterion/critical_value/__init__.py similarity index 100% rename from pysatl_criterion/cv_calculator/__init__.py rename to pysatl_criterion/critical_value/__init__.py diff --git a/pysatl_criterion/p_value_calculator/__init__.py b/pysatl_criterion/critical_value/critical_area/__init__.py similarity index 100% rename from pysatl_criterion/p_value_calculator/__init__.py rename to pysatl_criterion/critical_value/critical_area/__init__.py diff --git a/pysatl_criterion/critical_value/critical_area/critical_areas.py b/pysatl_criterion/critical_value/critical_area/critical_areas.py new file mode 100644 index 0000000..6adb8d5 --- /dev/null +++ b/pysatl_criterion/critical_value/critical_area/critical_areas.py @@ -0,0 +1,38 @@ +from dataclasses import dataclass + +from pysatl_criterion.critical_value.critical_area.model import CriticalArea + + +@dataclass +class LeftCriticalArea(CriticalArea): + critical_value: float + + def __init__(self, critical_value: float): + self.critical_value = critical_value + + def contains(self, value: float) -> bool: + return value >= self.critical_value + + +@dataclass +class RightCriticalArea(CriticalArea): + critical_value: float + + def __init__(self, critical_value: float): + self.critical_value = critical_value + + def contains(self, value: float) -> bool: + return value <= self.critical_value + + +@dataclass +class TwoSidedCriticalArea(CriticalArea): + left_cv: float + right_cv: float + + def __init__(self, left_cv: float, right_cv: float): + self.left_cv = left_cv + self.right_cv = right_cv + + def contains(self, value: float) -> bool: + return self.left_cv <= value <= self.right_cv diff --git a/pysatl_criterion/critical_value/critical_area/model.py b/pysatl_criterion/critical_value/critical_area/model.py new file mode 100644 index 0000000..129bbb1 --- /dev/null +++ b/pysatl_criterion/critical_value/critical_area/model.py @@ -0,0 +1,10 @@ +from typing import Protocol + + +class CriticalArea(Protocol): + def contains(self, value: float) -> bool: + """ + Check critical area contains value. + :param value: true, if critical area contains value, false otherwise + """ + pass diff --git a/pysatl_criterion/cv_calculator/cv_calculator/cv_calculator.py b/pysatl_criterion/critical_value/cv_calculator/cv_calculator.py similarity index 100% rename from pysatl_criterion/cv_calculator/cv_calculator/cv_calculator.py rename to pysatl_criterion/critical_value/cv_calculator/cv_calculator.py diff --git a/pysatl_criterion/critical_value/loader/__init__.py b/pysatl_criterion/critical_value/loader/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pysatl_criterion/critical_value/loader/remote_loader.py b/pysatl_criterion/critical_value/loader/remote_loader.py new file mode 100644 index 0000000..3558032 --- /dev/null +++ b/pysatl_criterion/critical_value/loader/remote_loader.py @@ -0,0 +1,35 @@ +import logging + +from pysatl_criterion.persistence.model.limit_distribution.limit_distribution import ( + CriticalValueQuery, + ILimitDistributionStorage, +) + + +class CriticalValueLoader: + def __init__( + self, local_storage: ILimitDistributionStorage, remote_storage: ILimitDistributionStorage + ): + self.__local_storage = local_storage + self.__remote_storage = remote_storage + + def load(self, criterion_code: str, sample_size: int, sample_size_error: int = 0): + """ + Load data from remote distribution storage to local distribution storage. + + :param criterion_code: criterion code + :param sample_size: sample size + :param sample_size_error: sample size error. + Get sample_size - sample_size_error <= sample_size <= sample_size + sample_size_error + """ + + logging.info(f"Load criterion {criterion_code} with size {sample_size} from remote") + query = CriticalValueQuery(criterion_code, sample_size, sample_size_error) + remote_data = self.__remote_storage.get_data_for_cv(query) + + if remote_data is not None: + self.__local_storage.insert_data(remote_data) + else: + logging.warning( + f"Remote data for criterion {criterion_code} " f"with size {sample_size} not found" + ) diff --git a/pysatl_criterion/critical_value/resolver/__init__.py b/pysatl_criterion/critical_value/resolver/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pysatl_criterion/critical_value/resolver/composite_resolver.py b/pysatl_criterion/critical_value/resolver/composite_resolver.py new file mode 100644 index 0000000..ba54d1e --- /dev/null +++ b/pysatl_criterion/critical_value/resolver/composite_resolver.py @@ -0,0 +1,55 @@ +from typing_extensions import override + +from pysatl_criterion.critical_value.loader.remote_loader import CriticalValueLoader +from pysatl_criterion.critical_value.resolver.model import CriticalArea, CriticalValueResolver +from pysatl_criterion.critical_value.resolver.storage_resolver import StorageCriticalValueResolver +from pysatl_criterion.statistics.models import HypothesisType + + +class CompositeCriticalValueResolver(CriticalValueResolver): + """ + Critical value composite resolver. + """ + + def __init__( + self, + local_resolver: StorageCriticalValueResolver, + remote_resolver: StorageCriticalValueResolver, + ): + self._local_resolver = local_resolver + self._remote_resolver = remote_resolver + + @override + def resolve( + self, + criterion_code: str, + sample_size: int, + sl: float, + alternative: HypothesisType = HypothesisType.RIGHT, + ) -> CriticalArea | None: + """ + Resolve critical value for given criterion. + 1. Try to get local value + 2. Try to get remote value and cache it to local storage. + + :param criterion_code: criterion code. + :param sample_size: sample size. + :param sl: significance level. + :param alternative: test alternative + + :return: critical value. + """ + + # 1. Try to get local value + result = self._local_resolver.resolve(criterion_code, sample_size, sl, alternative) + + if result is not None: + return result + + # 2. Try to get remote value and cache it to local storage. + CriticalValueLoader( + self._local_resolver.limit_distribution_storage, + self._remote_resolver.limit_distribution_storage, + ).load(criterion_code, sample_size) + + return self._local_resolver.resolve(criterion_code, sample_size, sl, alternative) diff --git a/pysatl_criterion/critical_value/resolver/model.py b/pysatl_criterion/critical_value/resolver/model.py new file mode 100644 index 0000000..c2af2bb --- /dev/null +++ b/pysatl_criterion/critical_value/resolver/model.py @@ -0,0 +1,29 @@ +from typing import Protocol + +from pysatl_criterion.critical_value.critical_area.model import CriticalArea +from pysatl_criterion.statistics.models import HypothesisType + + +class CriticalValueResolver(Protocol): + """ + Critical value calculator interface. Calculate critical area. + """ + + def resolve( + self, + criterion_code: str, + sample_size: int, + sl: float, + alternative: HypothesisType = HypothesisType.RIGHT, + ) -> CriticalArea | None: + """ + Resolver critical value for given criterion from storage. + + :param criterion_code: criterion code + :param sample_size: sample size + :param sl: significance level + :param alternative: test alternative + + :return: critical value if critical value exists, None otherwise + """ + pass diff --git a/pysatl_criterion/critical_value/resolver/storage_resolver.py b/pysatl_criterion/critical_value/resolver/storage_resolver.py new file mode 100644 index 0000000..d956251 --- /dev/null +++ b/pysatl_criterion/critical_value/resolver/storage_resolver.py @@ -0,0 +1,65 @@ +import numpy as np +import scipy.stats as scipy_stats +from typing_extensions import override + +from pysatl_criterion.critical_value.critical_area.critical_areas import ( + LeftCriticalArea, + RightCriticalArea, + TwoSidedCriticalArea, +) +from pysatl_criterion.critical_value.critical_area.model import CriticalArea +from pysatl_criterion.critical_value.resolver.model import CriticalValueResolver +from pysatl_criterion.persistence.model.limit_distribution.limit_distribution import ( + CriticalValueQuery, + ILimitDistributionStorage, +) +from pysatl_criterion.statistics.models import HypothesisType + + +class StorageCriticalValueResolver(CriticalValueResolver): + """ + Critical value resolver. + + :param limit_distribution_storage: limit distribution storage + """ + + def __init__(self, limit_distribution_storage: ILimitDistributionStorage): + self.limit_distribution_storage = limit_distribution_storage + + @override + def resolve( + self, + criterion_code: str, + sample_size: int, + sl: float, + alternative: HypothesisType = HypothesisType.RIGHT, + ) -> CriticalArea | None: + """ + Resolver critical value for given criterion from storage. + + :param criterion_code: criterion code + :param sample_size: sample size + :param sl: significance level + :param alternative: test alternative + + :return: critical value if critical value exists, None otherwise + """ + + query = CriticalValueQuery(criterion_code=criterion_code, sample_size=sample_size) + limit_distribution = self.limit_distribution_storage.get_data_for_cv(query) + + if limit_distribution is None: + return None + + ecdf = scipy_stats.ecdf(limit_distribution.results_statistics) + + if alternative == HypothesisType.RIGHT: + return RightCriticalArea(float(np.quantile(ecdf.cdf.quantiles, q=1 - sl))) + elif alternative == HypothesisType.LEFT: + return LeftCriticalArea(float(np.quantile(ecdf.cdf.quantiles, q=sl))) + elif alternative == HypothesisType.TWO_TAILED: + left = float(np.quantile(ecdf.cdf.quantiles, q=sl / 2)) + right = float(np.quantile(ecdf.cdf.quantiles, q=1 - sl / 2)) + return TwoSidedCriticalArea(left, right) + else: + raise ValueError(f"Unknown alternative: {alternative}.") diff --git a/pysatl_criterion/p_value/__init__.py b/pysatl_criterion/p_value/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pysatl_criterion/p_value/resolver/__init__.py b/pysatl_criterion/p_value/resolver/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pysatl_criterion/p_value/resolver/calculation_resolver.py b/pysatl_criterion/p_value/resolver/calculation_resolver.py new file mode 100644 index 0000000..964a2b4 --- /dev/null +++ b/pysatl_criterion/p_value/resolver/calculation_resolver.py @@ -0,0 +1,64 @@ +import numpy as np +import scipy.stats as scipy_stats +from typing_extensions import override + +from pysatl_criterion.p_value.resolver.model import PValueResolver +from pysatl_criterion.persistence.model.limit_distribution.limit_distribution import ( + CriticalValueQuery, + ILimitDistributionStorage, +) +from pysatl_criterion.statistics.models import HypothesisType + + +class CalculationPValueResolver(PValueResolver): + """ + P-value calculator. + + :param limit_distribution_storage: limit distribution storage + """ + + def __init__(self, limit_distribution_storage: ILimitDistributionStorage): + self.limit_distribution_storage = limit_distribution_storage + + @override + def resolve( + self, + criterion_code: str, + sample_size: int, + statistics_value: float, + alternative: HypothesisType = HypothesisType.RIGHT, + ) -> float: + """ + Calculate p-value. + + :param criterion_code: criterion code + :param sample_size: sample size + :param statistics_value: statistics value + :param alternative: test alternative + + :return: p-value + """ + + query = CriticalValueQuery(criterion_code=criterion_code, sample_size=sample_size) + limit_distribution_from_db = self.limit_distribution_storage.get_data_for_cv(query) + + if limit_distribution_from_db is None: + raise ValueError( + f"Limit distribution for criterion {criterion_code} " + f"and sample size {sample_size} does not exist." + ) + + simulation_results = limit_distribution_from_db.results_statistics + + ecdf = scipy_stats.ecdf(simulation_results) + + cdf_value = float(ecdf.cdf.evaluate(statistics_value)) + + if alternative == HypothesisType.RIGHT: + return 1.0 - cdf_value + elif alternative == HypothesisType.TWO_TAILED: + return 2.0 * min(cdf_value, 1.0 - cdf_value) + elif alternative == HypothesisType.LEFT: + return cdf_value + else: + raise ValueError(f"Unknown alternative {alternative}") diff --git a/pysatl_criterion/p_value_calculator/p_value_calculator/p_value_calculator.py b/pysatl_criterion/p_value/resolver/local_resolver.py similarity index 87% rename from pysatl_criterion/p_value_calculator/p_value_calculator/p_value_calculator.py rename to pysatl_criterion/p_value/resolver/local_resolver.py index 91996d5..0d36438 100644 --- a/pysatl_criterion/p_value_calculator/p_value_calculator/p_value_calculator.py +++ b/pysatl_criterion/p_value/resolver/local_resolver.py @@ -1,5 +1,7 @@ import scipy.stats as scipy_stats +from typing_extensions import override +from pysatl_criterion.p_value.resolver.model import PValueResolver from pysatl_criterion.persistence.limit_distribution.sqlite.sqlite import ( SQLiteLimitDistributionStorage, ) @@ -9,7 +11,7 @@ from pysatl_criterion.statistics.models import HypothesisType -class PValueCalculator: +class LocalPValueResolver(PValueResolver): """ P-value calculator. @@ -19,7 +21,8 @@ class PValueCalculator: def __init__(self, limit_distribution_storage: SQLiteLimitDistributionStorage): self.limit_distribution_storage = limit_distribution_storage - def calculate_p_value( + @override + def resolve( self, criterion_code: str, sample_size: int, @@ -45,9 +48,7 @@ def calculate_p_value( "Limit distribution for given criterion and sample size does not exist." ) - simulation_results = limit_distribution_from_db.results_statistics - - ecdf = scipy_stats.ecdf(simulation_results) + ecdf = scipy_stats.ecdf(limit_distribution_from_db.results_statistics) cdf_value = ecdf.cdf.evaluate(statistics_value) diff --git a/pysatl_criterion/p_value/resolver/model.py b/pysatl_criterion/p_value/resolver/model.py new file mode 100644 index 0000000..7671bd5 --- /dev/null +++ b/pysatl_criterion/p_value/resolver/model.py @@ -0,0 +1,26 @@ +from typing_extensions import Protocol + +from pysatl_criterion.statistics.models import HypothesisType + + +class PValueResolver(Protocol): + """ + P-value resolver. + """ + + def resolve( + self, + criterion_code: str, + sample_size: int, + statistics_value: float, + alternative: HypothesisType = HypothesisType.RIGHT, + ) -> float | None: + """ + Resolve p-value. + + :param criterion_code: criterion code + :param sample_size: sample size + :param statistics_value: statistic value + :param alternative: alternative + """ + pass diff --git a/pysatl_criterion/persistence/limit_distribution/datastorage/datastorage.py b/pysatl_criterion/persistence/limit_distribution/datastorage/datastorage.py index 658180d..20af076 100644 --- a/pysatl_criterion/persistence/limit_distribution/datastorage/datastorage.py +++ b/pysatl_criterion/persistence/limit_distribution/datastorage/datastorage.py @@ -12,7 +12,7 @@ from pysatl_criterion.persistence.model.orm.orm import Base, LimitDistributionORM -class SQLAlchemyLimitDistributionStorage(ILimitDistributionStorage): +class AlchemyLimitDistributionStorage(ILimitDistributionStorage): """ SQLAlchemy-based implementation of ILimitDistributionStorage. """ diff --git a/pysatl_criterion/persistence/model/limit_distribution/limit_distribution.py b/pysatl_criterion/persistence/model/limit_distribution/limit_distribution.py index 80ed8e4..1df749a 100644 --- a/pysatl_criterion/persistence/model/limit_distribution/limit_distribution.py +++ b/pysatl_criterion/persistence/model/limit_distribution/limit_distribution.py @@ -30,6 +30,7 @@ class LimitDistributionQuery(DataQuery): class CriticalValueQuery(DataQuery): criterion_code: str sample_size: int + sample_size_error: int = 0 class ILimitDistributionStorage( diff --git a/pysatl_criterion/test/goodness_of_fit_test/goodness_of_fit_test.py b/pysatl_criterion/test/goodness_of_fit_test/goodness_of_fit_test.py index eeb18d9..372474a 100644 --- a/pysatl_criterion/test/goodness_of_fit_test/goodness_of_fit_test.py +++ b/pysatl_criterion/test/goodness_of_fit_test/goodness_of_fit_test.py @@ -1,12 +1,17 @@ -from pysatl_criterion.cv_calculator.cv_calculator.cv_calculator import CVCalculator -from pysatl_criterion.p_value_calculator.p_value_calculator.p_value_calculator import ( - PValueCalculator, +from pysatl_criterion.constants import LOCAL_LIMIT_DISTRIBUTION_URL +from pysatl_criterion.critical_value.resolver.composite_resolver import ( + CompositeCriticalValueResolver, ) -from pysatl_criterion.persistence.limit_distribution.sqlite.sqlite import ( - SQLiteLimitDistributionStorage, +from pysatl_criterion.critical_value.resolver.model import CriticalValueResolver +from pysatl_criterion.critical_value.resolver.storage_resolver import StorageCriticalValueResolver +from pysatl_criterion.p_value.resolver.calculation_resolver import CalculationPValueResolver +from pysatl_criterion.p_value.resolver.model import PValueResolver +from pysatl_criterion.persistence.limit_distribution.datastorage.datastorage import ( + AlchemyLimitDistributionStorage, ) from pysatl_criterion.statistics.goodness_of_fit import AbstractGoodnessOfFitStatistic from pysatl_criterion.statistics.models import HypothesisType +from pysatl_criterion.test.model import TestMethod class GoodnessOfFitTest: @@ -24,51 +29,74 @@ def __init__( self, statistics: AbstractGoodnessOfFitStatistic, significance_level: float, - db_connection_string: str = "sqlite:///limit_distributions.sqlite", - test_method: str = "critical_value", + cv_resolver: CriticalValueResolver | None = None, + p_value_resolver: PValueResolver | None = None, + test_method: TestMethod = TestMethod.CRITICAL_VALUE, alternative: HypothesisType = HypothesisType.RIGHT, ): self.statistics = statistics self.significance_level = significance_level - self.db_connection_string = db_connection_string self.test_method = test_method self.alternative = alternative + if cv_resolver is None and test_method == TestMethod.CRITICAL_VALUE: + cv_local_storage = AlchemyLimitDistributionStorage(LOCAL_LIMIT_DISTRIBUTION_URL) + cv_local_storage.init() + + cv_remote_storage = AlchemyLimitDistributionStorage(LOCAL_LIMIT_DISTRIBUTION_URL) + cv_remote_storage.init() + + cv_resolver = CompositeCriticalValueResolver( + StorageCriticalValueResolver(cv_local_storage), + StorageCriticalValueResolver(cv_remote_storage), + ) + + if p_value_resolver is None and test_method == TestMethod.P_VALUE: + p_storage = AlchemyLimitDistributionStorage(LOCAL_LIMIT_DISTRIBUTION_URL) + p_storage.init() + + p_value_resolver = CalculationPValueResolver(p_storage) + + self.cv_calculator = cv_resolver + self.p_value_resolver = p_value_resolver + def test(self, data: list[float]) -> bool: """ Perform goodness of fit. - :param data: data. + :param data: data to test. - :return: True if data is good, False otherwise. + :return: True if data is fitted distribution, False otherwise. """ - limit_distribution_storage = SQLiteLimitDistributionStorage(self.db_connection_string) - limit_distribution_storage.init() - data_size = len(data) criterion_code = self.statistics.code() statistics_value = self.statistics.execute_statistic(data) - if self.test_method == "critical_value": - cv_calculator = CVCalculator(limit_distribution_storage) - - critical_values = cv_calculator.calculate_critical_value( + if self.test_method == TestMethod.CRITICAL_VALUE: + critical_area = self.cv_calculator.resolve( criterion_code, data_size, self.significance_level, self.alternative, ) - return self.alternative.check_hypothesis(statistics_value, critical_values) - elif self.test_method == "p_value": - p_value_calculator = PValueCalculator(limit_distribution_storage) - p_value = p_value_calculator.calculate_p_value( + if critical_area is None: + raise ValueError( + f"Limit distribution for criterion {criterion_code} and " + f"sample size {data_size} does not exist." + ) + + return critical_area.contains(statistics_value) + + elif self.test_method == TestMethod.P_VALUE: + p_value = self.p_value_resolver.resolve( criterion_code, data_size, statistics_value, self.alternative, ) - return p_value >= self.significance_level + + return p_value is not None and p_value >= self.significance_level else: - raise ValueError("Invalid test method.") + raise ValueError(f"Invalid test method {self.test_method}.") diff --git a/pysatl_criterion/test/model.py b/pysatl_criterion/test/model.py new file mode 100644 index 0000000..1b06199 --- /dev/null +++ b/pysatl_criterion/test/model.py @@ -0,0 +1,13 @@ +from enum import Enum + + +class TestMethod(Enum): + # Disables this test from being run + __test__ = False + + """ + Test methods for hypotheses. + """ + + CRITICAL_VALUE = "critical_value" + P_VALUE = "p_value" diff --git a/tests/calc/test_cv.py b/tests/calc/test_cv.py index b72a924..c0019cd 100644 --- a/tests/calc/test_cv.py +++ b/tests/calc/test_cv.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pysatl_criterion.cv_calculator.cv_calculator.cv_calculator import CVCalculator +from pysatl_criterion.critical_value.cv_calculator.cv_calculator import CVCalculator from pysatl_criterion.statistics.models import HypothesisType diff --git a/tests/calc/test_goodness_of_fit_test.py b/tests/calc/test_goodness_of_fit_test.py index e67b84a..0ec80c2 100644 --- a/tests/calc/test_goodness_of_fit_test.py +++ b/tests/calc/test_goodness_of_fit_test.py @@ -1,59 +1,55 @@ +from typing import cast from unittest.mock import MagicMock +from pysatl_criterion.critical_value.critical_area.critical_areas import RightCriticalArea +from pysatl_criterion.critical_value.resolver.model import CriticalValueResolver +from pysatl_criterion.p_value.resolver.model import PValueResolver from pysatl_criterion.statistics.goodness_of_fit import AbstractGoodnessOfFitStatistic from pysatl_criterion.statistics.models import HypothesisType from pysatl_criterion.test.goodness_of_fit_test.goodness_of_fit_test import GoodnessOfFitTest +from pysatl_criterion.test.model import TestMethod -MODULE_PATH = "pysatl_criterion.test.goodness_of_fit_test.goodness_of_fit_test" - - -def test_goodness_of_fit_cv_path_accepts_hypothesis(mocker): +def test_goodness_of_fit_cv_path_accepts_hypothesis(): mock_statistic = MagicMock(spec=AbstractGoodnessOfFitStatistic) mock_statistic.code.return_value = "test_criterion" mock_statistic.execute_statistic.return_value = 10.0 - MockCVCalculator = mocker.patch(f"{MODULE_PATH}.CVCalculator") - MockPValueCalculator = mocker.patch(f"{MODULE_PATH}.PValueCalculator") - mocker.patch(f"{MODULE_PATH}.SQLiteLimitDistributionStorage") - - mock_cv_instance = MockCVCalculator.return_value - mock_cv_instance.calculate_critical_value.return_value = 15.0 + # Mock the critical value calculator + mock_cv_calculator = cast(CriticalValueResolver, MagicMock()) + mock_cv_calculator.resolve.return_value = MagicMock( + contains=lambda x: x < 15.0 + ) # Accepts hypothesis gof_test = GoodnessOfFitTest( statistics=mock_statistic, significance_level=0.05, - test_method="critical_value", + test_method=TestMethod.CRITICAL_VALUE, alternative=HypothesisType.RIGHT, + cv_resolver=mock_cv_calculator, ) assert gof_test.test(data=[1, 2, 3]) is True - mock_cv_instance.calculate_critical_value.assert_called_once() - MockPValueCalculator.assert_not_called() -def test_goodness_of_fit_cv_path_rejects_hypothesis(mocker): - mock_statistic = MagicMock(spec=AbstractGoodnessOfFitStatistic) +def test_goodness_of_fit_cv_path_rejects_hypothesis(): + mock_statistic = cast(AbstractGoodnessOfFitStatistic, MagicMock()) mock_statistic.code.return_value = "test_criterion" mock_statistic.execute_statistic.return_value = 10.0 - MockCVCalculator = mocker.patch(f"{MODULE_PATH}.CVCalculator") - MockPValueCalculator = mocker.patch(f"{MODULE_PATH}.PValueCalculator") - mocker.patch(f"{MODULE_PATH}.SQLiteLimitDistributionStorage") - - mock_cv_instance = MockCVCalculator.return_value - mock_cv_instance.calculate_critical_value.return_value = 9.0 + # Mock the critical value calculator + mock_cv_calculator = cast(CriticalValueResolver, MagicMock()) + mock_cv_calculator.resolve.return_value = RightCriticalArea(9) gof_test = GoodnessOfFitTest( statistics=mock_statistic, significance_level=0.05, - test_method="critical_value", + test_method=TestMethod.CRITICAL_VALUE, alternative=HypothesisType.RIGHT, + cv_resolver=mock_cv_calculator, ) assert gof_test.test(data=[1, 2, 3]) is False - mock_cv_instance.calculate_critical_value.assert_called_once() - MockPValueCalculator.assert_not_called() def test_goodness_of_fit_p_value_path_accepts_hypothesis(mocker): @@ -61,54 +57,33 @@ def test_goodness_of_fit_p_value_path_accepts_hypothesis(mocker): mock_statistic.code.return_value = "test_criterion" mock_statistic.execute_statistic.return_value = 10.0 - MockCVCalculator = mocker.patch(f"{MODULE_PATH}.CVCalculator") - MockPValueCalculator = mocker.patch(f"{MODULE_PATH}.PValueCalculator") - mocker.patch(f"{MODULE_PATH}.SQLiteLimitDistributionStorage") - - mock_p_value_instance = MockPValueCalculator.return_value - mock_p_value_instance.calculate_p_value.return_value = 0.1 + # Mock PValueCalculator using the imported class + mock_p_value_calculator = mocker.patch.object(PValueResolver, "__new__") + mock_p_value_instance = cast(PValueResolver, MagicMock()) + mock_p_value_calculator.return_value = mock_p_value_instance + mock_p_value_instance.resolve.return_value = 0.1 gof_test = GoodnessOfFitTest( - statistics=mock_statistic, significance_level=0.05, test_method="p_value" + statistics=mock_statistic, significance_level=0.05, test_method=TestMethod.P_VALUE ) assert gof_test.test(data=[1, 2, 3]) is True - mock_p_value_instance.calculate_p_value.assert_called_once() - MockCVCalculator.assert_not_called() -def test_goodness_of_fit_p_value_path_rejects_hypothesis(mocker): - mock_statistic = MagicMock(spec=AbstractGoodnessOfFitStatistic) +def test_goodness_of_fit_p_value_path_rejects_hypothesis(): + mock_statistic = cast(AbstractGoodnessOfFitStatistic, MagicMock()) mock_statistic.code.return_value = "test_criterion" mock_statistic.execute_statistic.return_value = 10.0 - MockCVCalculator = mocker.patch(f"{MODULE_PATH}.CVCalculator") - MockPValueCalculator = mocker.patch(f"{MODULE_PATH}.PValueCalculator") - mocker.patch(f"{MODULE_PATH}.SQLiteLimitDistributionStorage") - - mock_p_value_instance = MockPValueCalculator.return_value - mock_p_value_instance.calculate_p_value.return_value = 0.01 + # Mock PValueCalculator using the imported class + mock_p_value_instance = cast(PValueResolver, MagicMock()) + mock_p_value_instance.resolve.return_value = 0.01 gof_test = GoodnessOfFitTest( - statistics=mock_statistic, significance_level=0.05, test_method="p_value" + p_value_resolver=mock_p_value_instance, + statistics=mock_statistic, + significance_level=0.05, + test_method=TestMethod.P_VALUE ) assert gof_test.test(data=[1, 2, 3]) is False - mock_p_value_instance.calculate_p_value.assert_called_once() - MockCVCalculator.assert_not_called() - - -# TODO: cannot check raise, because creating "sqlite:/" directory, CI failing -""" -def test_goodness_of_fit_raises_for_invalid_method(): - mock_statistic = MagicMock(spec=AbstractGoodnessOfFitStatistic) - mock_statistic.code.return_value = "test_criterion" - mock_statistic.execute_statistic.return_value = 10.0 - - gof_test = GoodnessOfFitTest( - statistics=mock_statistic, significance_level=0.05, test_method="this_is_wrong" - ) - with pytest.raises(ValueError, match="Invalid test method."): - gof_test.test(data=[1, 2, 3]) - -""" diff --git a/tests/calc/test_p_value.py b/tests/calc/test_p_value.py deleted file mode 100644 index e69b0c1..0000000 --- a/tests/calc/test_p_value.py +++ /dev/null @@ -1,115 +0,0 @@ -from unittest.mock import MagicMock - -import numpy as np -import pytest - -from pysatl_criterion.p_value_calculator.p_value_calculator.p_value_calculator import ( - PValueCalculator, -) -from pysatl_criterion.statistics.models import HypothesisType - - -def test_calc_p_value_right_tailed(): - mock_storage = MagicMock() - mock_distribution = MagicMock() - - mock_distribution.results_statistics = np.array(range(100)) - - mock_storage.get_data_for_cv.return_value = mock_distribution - calculator = PValueCalculator(limit_distribution_storage=mock_storage) - - p_value = calculator.calculate_p_value( - criterion_code="any_code", - sample_size=100, - statistics_value=89.5, - alternative=HypothesisType.RIGHT, - ) - - assert p_value == pytest.approx(0.1) - - -def test_calc_p_value_left_tailed(): - mock_storage = MagicMock() - mock_distribution = MagicMock() - - mock_distribution.results_statistics = np.array(range(100)) - - mock_storage.get_data_for_cv.return_value = mock_distribution - calculator = PValueCalculator(limit_distribution_storage=mock_storage) - - p_value = calculator.calculate_p_value( - criterion_code="any_code", - sample_size=100, - statistics_value=89.5, - alternative=HypothesisType.LEFT, - ) - assert p_value == pytest.approx(0.9) - - -def test_calc_p_value_two_tailed(): - mock_storage = MagicMock() - mock_distribution = MagicMock() - - mock_distribution.results_statistics = np.array(range(100)) - - mock_storage.get_data_for_cv.return_value = mock_distribution - calculator = PValueCalculator(limit_distribution_storage=mock_storage) - - p_value = calculator.calculate_p_value( - criterion_code="any_code", - sample_size=100, - statistics_value=89.5, - alternative=HypothesisType.TWO_TAILED, - ) - - assert p_value == pytest.approx(0.2) - mock_storage.get_data_for_cv.assert_called_once() - - -def test_calculate_p_value_statistic_outside_simulation_range(): - mock_storage = MagicMock() - mock_distribution = MagicMock() - - mock_distribution.results_statistics = np.array([10, 20, 30, 40, 50]) - mock_storage.get_data_for_cv.return_value = mock_distribution - calculator = PValueCalculator(limit_distribution_storage=mock_storage) - - p_value_high = calculator.calculate_p_value("any", 10, statistics_value=100.0) - assert p_value_high == pytest.approx(0.0) - - p_value_low = calculator.calculate_p_value("any", 10, statistics_value=5.0) - assert p_value_low == pytest.approx(1.0) - - -def test_calculate_p_value_raises_limit_distribution_error(): - mock_storage = MagicMock() - mock_storage.get_data_for_cv.return_value = None - - calculator = PValueCalculator(limit_distribution_storage=mock_storage) - - with pytest.raises( - ValueError, match="Limit distribution for given criterion and sample size does not exist." - ): - calculator.calculate_p_value( - criterion_code="any_code", - sample_size=100, - statistics_value=89.5, - ) - mock_storage.get_data_for_cv.assert_called_once() - - -def test_calculate_p_value_raises_unknown_alternative(): - mock_storage = MagicMock() - mock_distribution = MagicMock() - - mock_distribution.results_statistics = np.array(range(100)) - mock_storage.get_data_for_cv.return_value = mock_distribution - calculator = PValueCalculator(limit_distribution_storage=mock_storage) - with pytest.raises(ValueError, match="Unknown alternative"): - calculator.calculate_p_value( - criterion_code="any_code", - sample_size=100, - statistics_value=89.5, - alternative="not_valid", - ) - mock_storage.get_data_for_cv.assert_called_once() diff --git a/tests/critical_value/test_critical_areas.py b/tests/critical_value/test_critical_areas.py new file mode 100644 index 0000000..5119a3e --- /dev/null +++ b/tests/critical_value/test_critical_areas.py @@ -0,0 +1,45 @@ +import pytest + +from pysatl_criterion.critical_value.critical_area.critical_areas import ( + LeftCriticalArea, + RightCriticalArea, + TwoSidedCriticalArea, +) + + +def test_left_critical_area_contains_expected_values(): + critical_value = 3.0 + area = LeftCriticalArea(critical_value) + + assert area.critical_value == critical_value + assert area.contains(3.0) + assert area.contains(5.5) + assert not area.contains(2.999) + + +def test_right_critical_area_contains_expected_values(): + critical_value = 4.0 + area = RightCriticalArea(critical_value) + + assert area.critical_value == critical_value + assert area.contains(4.0) + assert area.contains(-10.0) + assert not area.contains(4.001) + + +@pytest.mark.parametrize( + ("left_cv", "right_cv", "value", "is_inside"), + [ + (1.0, 5.0, 1.0, True), + (1.0, 5.0, 5.0, True), + (1.0, 5.0, 3.0, True), + (1.0, 5.0, 0.999, False), + (1.0, 5.0, 5.001, False), + ], +) +def test_two_sided_critical_area_contains_range(left_cv, right_cv, value, is_inside): + area = TwoSidedCriticalArea(left_cv, right_cv) + + assert area.left_cv == left_cv + assert area.right_cv == right_cv + assert area.contains(value) is is_inside diff --git a/tests/critical_value/test_remote_loader.py b/tests/critical_value/test_remote_loader.py new file mode 100644 index 0000000..94b3197 --- /dev/null +++ b/tests/critical_value/test_remote_loader.py @@ -0,0 +1,243 @@ +import pytest + +from pysatl_criterion.critical_value.loader.remote_loader import CriticalValueLoader +from pysatl_criterion.persistence.limit_distribution.datastorage.datastorage import ( + AlchemyLimitDistributionStorage, +) +from pysatl_criterion.persistence.model.limit_distribution.limit_distribution import ( + CriticalValueQuery, + LimitDistributionModel, +) + + +@pytest.fixture +def local_storage(): + """Create local storage using SQLAlchemy in-memory database.""" + storage = AlchemyLimitDistributionStorage("sqlite:///:memory:") + storage.init() + return storage + + +@pytest.fixture +def remote_storage(): + """Create remote storage using SQLAlchemy in-memory database.""" + storage = AlchemyLimitDistributionStorage("sqlite:///:memory:") + storage.init() + return storage + + +@pytest.fixture +def loader(local_storage, remote_storage): + """Create CriticalValueLoader instance with real storages.""" + return CriticalValueLoader(local_storage, remote_storage) + + +@pytest.fixture +def sample_query(): + """Create a sample CriticalValueQuery for testing.""" + return CriticalValueQuery(criterion_code="test_criterion", sample_size=100) + + +@pytest.fixture +def sample_model(): + """Create a sample LimitDistributionModel for testing.""" + return LimitDistributionModel( + experiment_id=1, + criterion_code="test_criterion", + criterion_parameters=[1.0, 2.0], + sample_size=100, + monte_carlo_count=1000, + results_statistics=[0.1, 0.2, 0.3, 0.4, 0.5], + ) + + +def test_load_success_with_remote_data( + loader, sample_query, sample_model, remote_storage, local_storage +): + """Test load function when remote data is found and successfully inserted to local storage.""" + # Arrange - insert data into remote storage + remote_storage.insert_data(sample_model) + + # Act + loader.load(sample_query.criterion_code, sample_query.sample_size) + + # Assert - verify data was copied to local storage + local_data = local_storage.get_data_for_cv(sample_query) + assert local_data is not None + assert local_data.criterion_code == sample_model.criterion_code + assert local_data.sample_size == sample_model.sample_size + assert local_data.results_statistics == sample_model.results_statistics + + +def test_load_no_remote_data(loader, sample_query, local_storage): + """Test load function when remote data is not found (returns None).""" + # Act + loader.load(sample_query.criterion_code, sample_query.sample_size) + + # Assert - verify no data was inserted into local storage + local_data = local_storage.get_data_for_cv(sample_query) + assert local_data is None + + +def test_load_storage_interactions_correct_order( + loader, sample_query, sample_model, remote_storage, local_storage +): + """Test that storage methods are called in the correct order and data flows correctly.""" + # Arrange + remote_storage.insert_data(sample_model) + + # Act + loader.load(sample_query.criterion_code, sample_query.sample_size) + + # Assert - verify data exists in both storages + remote_data = remote_storage.get_data_for_cv(sample_query) + local_data = local_storage.get_data_for_cv(sample_query) + + assert remote_data is not None + assert local_data is not None + assert local_data.results_statistics == remote_data.results_statistics + + +def test_load_with_different_query_parameters(loader, remote_storage, local_storage): + """Test load function with different query parameters.""" + # Test with different criterion codes and sample sizes + test_cases = [ + ( + CriticalValueQuery(criterion_code="ks_test", sample_size=50), + LimitDistributionModel( + experiment_id=1, + criterion_code="ks_test", + criterion_parameters=[], + sample_size=50, + monte_carlo_count=1000, + results_statistics=[0.1, 0.2, 0.3], + ), + ), + ( + CriticalValueQuery(criterion_code="ad_test", sample_size=200), + LimitDistributionModel( + experiment_id=1, + criterion_code="ad_test", + criterion_parameters=[], + sample_size=200, + monte_carlo_count=1000, + results_statistics=[0.4, 0.5, 0.6], + ), + ), + ( + CriticalValueQuery(criterion_code="cvm_test", sample_size=1000), + LimitDistributionModel( + experiment_id=1, + criterion_code="cvm_test", + criterion_parameters=[], + sample_size=1000, + monte_carlo_count=1000, + results_statistics=[0.7, 0.8, 0.9], + ), + ), + ] + + for query, model in test_cases: + # Arrange + remote_storage.insert_data(model) + + # Act + loader.load(query.criterion_code, query.sample_size) + + # Assert + local_data = local_storage.get_data_for_cv(query) + assert local_data is not None + assert local_data.criterion_code == model.criterion_code + assert local_data.sample_size == model.sample_size + assert local_data.results_statistics == model.results_statistics + + +def test_load_with_empty_model_data(loader, sample_query, remote_storage, local_storage): + """Test load function with empty model data.""" + # Arrange + empty_model = LimitDistributionModel( + experiment_id=1, + criterion_code="test_criterion", + criterion_parameters=[], + sample_size=100, + monte_carlo_count=1000, + results_statistics=[], + ) + remote_storage.insert_data(empty_model) + + # Act + loader.load(sample_query.criterion_code, sample_query.sample_size) + + # Assert + local_data = local_storage.get_data_for_cv(sample_query) + assert local_data is not None + assert local_data.results_statistics == [] + + +def test_load_data_already_exists_locally( + loader, sample_query, sample_model, remote_storage, local_storage +): + """Test load function when data already exists in local storage.""" + # Arrange - insert same data into both storages + remote_storage.insert_data(sample_model) + + sample_model_local = LimitDistributionModel( + experiment_id=1, + criterion_code="ks_test", + criterion_parameters=[], + sample_size=100, + monte_carlo_count=1000, + results_statistics=[0.1, 0.2], + ) + local_storage.insert_data(sample_model_local) + + # Act + loader.load(sample_query.criterion_code, sample_query.sample_size) + + # Assert - verify data still exists and is correct + local_data = local_storage.get_data_for_cv(sample_query) + assert local_data is not None + assert local_data.results_statistics == sample_model.results_statistics + + +def test_load_multiple_criteria_same_sample_size(loader, remote_storage, local_storage): + """Test load function with multiple criteria but same sample size.""" + # Arrange + models = [ + LimitDistributionModel( + experiment_id=1, + criterion_code="ks_test", + criterion_parameters=[], + sample_size=100, + monte_carlo_count=1000, + results_statistics=[0.1, 0.2], + ), + LimitDistributionModel( + experiment_id=2, + criterion_code="ad_test", + criterion_parameters=[], + sample_size=100, + monte_carlo_count=1000, + results_statistics=[0.3, 0.4], + ), + ] + + for model in models: + remote_storage.insert_data(model) + + # Act - load each criterion + for model in models: + query = CriticalValueQuery( + criterion_code=model.criterion_code, sample_size=model.sample_size + ) + loader.load(query.criterion_code, query.sample_size) + + # Assert - verify all data was loaded correctly + for model in models: + query = CriticalValueQuery( + criterion_code=model.criterion_code, sample_size=model.sample_size + ) + local_data = local_storage.get_data_for_cv(query) + assert local_data is not None + assert local_data.criterion_code == model.criterion_code + assert local_data.results_statistics == model.results_statistics diff --git a/tests/p_value/resolver/test_calculation_resolver.py b/tests/p_value/resolver/test_calculation_resolver.py new file mode 100644 index 0000000..782d1ed --- /dev/null +++ b/tests/p_value/resolver/test_calculation_resolver.py @@ -0,0 +1,198 @@ +""" +Tests for PValueCalculator functionality. + +This module contains comprehensive tests for the PValueCalculator class, +covering different hypothesis types, edge cases, and error conditions. +""" +from typing import cast +from unittest.mock import MagicMock + +import numpy as np +import pytest + +from pysatl_criterion.p_value.resolver.calculation_resolver import CalculationPValueResolver +from pysatl_criterion.persistence.model.limit_distribution.limit_distribution import ( + ILimitDistributionStorage, + LimitDistributionModel, +) +from pysatl_criterion.statistics.models import HypothesisType + + +@pytest.fixture +def mock_storage(): + """Create a mock storage object for testing.""" + return cast(ILimitDistributionStorage, MagicMock()) + +@pytest.fixture +def mock_distribution(): + """Create a mock distribution object with test statistics.""" + mock_dist = MagicMock() + mock_dist.results_statistics = np.array(range(100)) + return mock_dist + +@pytest.fixture +def calculator_with_mock_data(mock_storage, mock_distribution): + """Create a calculator with mocked storage and distribution data.""" + mock_storage.get_data_for_cv.return_value = mock_distribution + return CalculationPValueResolver(mock_storage) + +@pytest.fixture +def calculator_with_empty_storage(mock_storage): + """Create a calculator with empty storage (returns None).""" + mock_storage.get_data_for_cv.return_value = None + return CalculationPValueResolver(mock_storage) + +@pytest.mark.parametrize( + "alternative,expected_p_value,test_name", + [ + (HypothesisType.RIGHT, 0.1, "right_tailed"), + (HypothesisType.LEFT, 0.9, "left_tailed"), + (HypothesisType.TWO_TAILED, 0.2, "two_tailed"), + ], +) +def test_calculate_p_value_for_different_alternatives( + calculator_with_mock_data, alternative, expected_p_value, test_name +): + """ + Test p-value calculation for different hypothesis types. + + Args: + alternative: The hypothesis type to test + expected_p_value: Expected p-value result + test_name: Name of the test case for identification + """ + # Given + criterion_code = "test_criterion" + sample_size = 100 + statistics_value = 89.5 + + # When + p_value = calculator_with_mock_data.resolve( + criterion_code=criterion_code, + sample_size=sample_size, + statistics_value=statistics_value, + alternative=alternative, + ) + + # Then + assert p_value == pytest.approx( + expected_p_value, abs=1e-10 + ), f"Expected p-value {expected_p_value} for {test_name}, got {p_value}" + +def test_calculate_p_value_with_statistic_outside_simulation_range(mock_storage): + """ + Test p-value calculation when statistic is outside the simulation range. + + This tests edge cases where the observed statistic is either much higher + or much lower than the simulated distribution range. + """ + + # Given + mock_distribution = cast(LimitDistributionModel, MagicMock()) + mock_distribution.results_statistics = np.array([10, 20, 30, 40, 50]) + mock_storage.get_data_for_cv.return_value = mock_distribution + calculator = CalculationPValueResolver(limit_distribution_storage=mock_storage) + + # When & Then - Statistic much higher than simulation range + p_value_high = calculator.resolve( + criterion_code="test_criterion", sample_size=10, statistics_value=100.0 + ) + assert p_value_high == pytest.approx( + 0.0 + ), "P-value should be 0 for statistic above simulation range" + + # When & Then - Statistic much lower than simulation range + p_value_low = calculator.resolve( + criterion_code="test_criterion", sample_size=10, statistics_value=5.0 + ) + assert p_value_low == pytest.approx( + 1.0 + ), "P-value should be 1 for statistic below simulation range" + +def test_calculate_p_value_raises_error_when_limit_distribution_not_found( + calculator_with_empty_storage +): + """ + Test that appropriate error is raised when limit distribution is not found. + + This tests the error handling when the storage cannot provide + the required limit distribution data. + """ + # Given + criterion_code = "nonexistent_criterion" + sample_size = 100 + statistics_value = 89.5 + + # When & Then + with pytest.raises( + ValueError, + match="Limit distribution for criterion nonexistent_criterion " + "and sample size 100 does not exist.", + ): + calculator_with_empty_storage.resolve( + criterion_code=criterion_code, + sample_size=sample_size, + statistics_value=statistics_value, + ) + +def test_calculate_p_value_raises_error_for_unknown_alternative(calculator_with_mock_data): + """ + Test that appropriate error is raised for unknown hypothesis alternatives. + + This tests the validation of the alternative parameter to ensure + only valid hypothesis types are accepted. + """ + # Given + criterion_code = "test_criterion" + sample_size = 100 + statistics_value = 89.5 + invalid_alternative = "invalid_hypothesis_type" + + # When & Then + with pytest.raises(ValueError, match="Unknown alternative"): + calculator_with_mock_data.resolve( + criterion_code=criterion_code, + sample_size=sample_size, + statistics_value=statistics_value, + alternative=invalid_alternative, + ) + +@pytest.mark.parametrize( + "statistics_array,statistics_value,expected_p_value", + [ + ([1, 2, 3, 4, 5], 3.0, 0.4), + ([10, 20, 30, 40, 50], 25.0, 0.6), + ([1, 1, 1, 1, 1], 1.0, 0.0), + ], +) +def test_p_value_calculation_with_different_distributions( + statistics_array, statistics_value, expected_p_value +): + """ + Test p-value calculation with various distribution patterns. + + Args: + statistics_array: Array of simulated statistics + statistics_value: Observed statistic value + expected_p_value: Expected p-value result + """ + # Given + mock_storage = cast(ILimitDistributionStorage, MagicMock()) + mock_distribution = cast(LimitDistributionModel, MagicMock()) + mock_distribution.results_statistics = statistics_array + mock_storage.get_data_for_cv.return_value = mock_distribution + calculator = CalculationPValueResolver(mock_storage) + + # When + p_value = calculator.resolve( + criterion_code="test_criterion", + sample_size=len(statistics_array), + statistics_value=statistics_value, + alternative=HypothesisType.RIGHT, + ) + + # Then + assert p_value == pytest.approx( + expected_p_value + ), (f"Failed for statistics_array: {statistics_array}, " + f"statistics_value: {statistics_value} expected {expected_p_value}, got {p_value}") diff --git a/tests/persistence/limit_distribution/datastorage/datastorage_test.py b/tests/persistence/limit_distribution/datastorage/datastorage_test.py index a983729..897b496 100644 --- a/tests/persistence/limit_distribution/datastorage/datastorage_test.py +++ b/tests/persistence/limit_distribution/datastorage/datastorage_test.py @@ -2,13 +2,13 @@ from sqlalchemy import inspect from pysatl_criterion.persistence.limit_distribution.datastorage.datastorage import ( - SQLAlchemyLimitDistributionStorage, + AlchemyLimitDistributionStorage, ) @pytest.fixture def storage(tmp_path): - storage = SQLAlchemyLimitDistributionStorage("sqlite:///:memory:") + storage = AlchemyLimitDistributionStorage("sqlite:///:memory:") storage.init() return storage diff --git a/tests/persistence/limit_distribution/limit_distribution_test.py b/tests/persistence/limit_distribution/limit_distribution_test.py index 060a117..1836f9a 100644 --- a/tests/persistence/limit_distribution/limit_distribution_test.py +++ b/tests/persistence/limit_distribution/limit_distribution_test.py @@ -1,7 +1,7 @@ import pytest from pysatl_criterion.persistence.limit_distribution.datastorage.datastorage import ( - SQLAlchemyLimitDistributionStorage, + AlchemyLimitDistributionStorage, ) from pysatl_criterion.persistence.limit_distribution.sqlite.sqlite import ( SQLiteLimitDistributionStorage, @@ -17,7 +17,7 @@ params=[ pytest.param(lambda: SQLiteLimitDistributionStorage(":memory:"), id="sqlite"), pytest.param( - lambda: SQLAlchemyLimitDistributionStorage("sqlite:///:memory:"), id="sqlalchemy" + lambda: AlchemyLimitDistributionStorage("sqlite:///:memory:"), id="sqlalchemy" ), ] ) From 12769823809a8d02b521a29f4b37643155db1a23 Mon Sep 17 00:00:00 2001 From: Alexey Mironov Date: Fri, 12 Dec 2025 11:01:30 +0300 Subject: [PATCH 2/9] Refactor --- .../p_value/resolver/calculation_resolver.py | 1 - .../goodness_of_fit_test.py | 4 +-- tests/calc/test_goodness_of_fit_test.py | 2 +- .../resolver/test_calculation_resolver.py | 25 +++++++++++++------ 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/pysatl_criterion/p_value/resolver/calculation_resolver.py b/pysatl_criterion/p_value/resolver/calculation_resolver.py index 964a2b4..c36a082 100644 --- a/pysatl_criterion/p_value/resolver/calculation_resolver.py +++ b/pysatl_criterion/p_value/resolver/calculation_resolver.py @@ -1,4 +1,3 @@ -import numpy as np import scipy.stats as scipy_stats from typing_extensions import override diff --git a/pysatl_criterion/test/goodness_of_fit_test/goodness_of_fit_test.py b/pysatl_criterion/test/goodness_of_fit_test/goodness_of_fit_test.py index 372474a..6e8df30 100644 --- a/pysatl_criterion/test/goodness_of_fit_test/goodness_of_fit_test.py +++ b/pysatl_criterion/test/goodness_of_fit_test/goodness_of_fit_test.py @@ -73,7 +73,7 @@ def test(self, data: list[float]) -> bool: criterion_code = self.statistics.code() statistics_value = self.statistics.execute_statistic(data) - if self.test_method == TestMethod.CRITICAL_VALUE: + if self.cv_calculator and self.test_method == TestMethod.CRITICAL_VALUE: critical_area = self.cv_calculator.resolve( criterion_code, data_size, @@ -89,7 +89,7 @@ def test(self, data: list[float]) -> bool: return critical_area.contains(statistics_value) - elif self.test_method == TestMethod.P_VALUE: + elif self.p_value_resolver and self.test_method == TestMethod.P_VALUE: p_value = self.p_value_resolver.resolve( criterion_code, data_size, diff --git a/tests/calc/test_goodness_of_fit_test.py b/tests/calc/test_goodness_of_fit_test.py index 0ec80c2..251814c 100644 --- a/tests/calc/test_goodness_of_fit_test.py +++ b/tests/calc/test_goodness_of_fit_test.py @@ -83,7 +83,7 @@ def test_goodness_of_fit_p_value_path_rejects_hypothesis(): p_value_resolver=mock_p_value_instance, statistics=mock_statistic, significance_level=0.05, - test_method=TestMethod.P_VALUE + test_method=TestMethod.P_VALUE, ) assert gof_test.test(data=[1, 2, 3]) is False diff --git a/tests/p_value/resolver/test_calculation_resolver.py b/tests/p_value/resolver/test_calculation_resolver.py index 782d1ed..74bb1fc 100644 --- a/tests/p_value/resolver/test_calculation_resolver.py +++ b/tests/p_value/resolver/test_calculation_resolver.py @@ -4,6 +4,7 @@ This module contains comprehensive tests for the PValueCalculator class, covering different hypothesis types, edge cases, and error conditions. """ + from typing import cast from unittest.mock import MagicMock @@ -23,6 +24,7 @@ def mock_storage(): """Create a mock storage object for testing.""" return cast(ILimitDistributionStorage, MagicMock()) + @pytest.fixture def mock_distribution(): """Create a mock distribution object with test statistics.""" @@ -30,18 +32,21 @@ def mock_distribution(): mock_dist.results_statistics = np.array(range(100)) return mock_dist + @pytest.fixture def calculator_with_mock_data(mock_storage, mock_distribution): """Create a calculator with mocked storage and distribution data.""" mock_storage.get_data_for_cv.return_value = mock_distribution return CalculationPValueResolver(mock_storage) + @pytest.fixture def calculator_with_empty_storage(mock_storage): """Create a calculator with empty storage (returns None).""" mock_storage.get_data_for_cv.return_value = None return CalculationPValueResolver(mock_storage) + @pytest.mark.parametrize( "alternative,expected_p_value,test_name", [ @@ -79,6 +84,7 @@ def test_calculate_p_value_for_different_alternatives( expected_p_value, abs=1e-10 ), f"Expected p-value {expected_p_value} for {test_name}, got {p_value}" + def test_calculate_p_value_with_statistic_outside_simulation_range(mock_storage): """ Test p-value calculation when statistic is outside the simulation range. @@ -95,7 +101,7 @@ def test_calculate_p_value_with_statistic_outside_simulation_range(mock_storage) # When & Then - Statistic much higher than simulation range p_value_high = calculator.resolve( - criterion_code="test_criterion", sample_size=10, statistics_value=100.0 + criterion_code="test_criterion", sample_size=10, statistics_value=100.0 ) assert p_value_high == pytest.approx( 0.0 @@ -103,14 +109,15 @@ def test_calculate_p_value_with_statistic_outside_simulation_range(mock_storage) # When & Then - Statistic much lower than simulation range p_value_low = calculator.resolve( - criterion_code="test_criterion", sample_size=10, statistics_value=5.0 + criterion_code="test_criterion", sample_size=10, statistics_value=5.0 ) assert p_value_low == pytest.approx( 1.0 ), "P-value should be 1 for statistic below simulation range" + def test_calculate_p_value_raises_error_when_limit_distribution_not_found( - calculator_with_empty_storage + calculator_with_empty_storage, ): """ Test that appropriate error is raised when limit distribution is not found. @@ -127,7 +134,7 @@ def test_calculate_p_value_raises_error_when_limit_distribution_not_found( with pytest.raises( ValueError, match="Limit distribution for criterion nonexistent_criterion " - "and sample size 100 does not exist.", + "and sample size 100 does not exist.", ): calculator_with_empty_storage.resolve( criterion_code=criterion_code, @@ -135,6 +142,7 @@ def test_calculate_p_value_raises_error_when_limit_distribution_not_found( statistics_value=statistics_value, ) + def test_calculate_p_value_raises_error_for_unknown_alternative(calculator_with_mock_data): """ Test that appropriate error is raised for unknown hypothesis alternatives. @@ -157,6 +165,7 @@ def test_calculate_p_value_raises_error_for_unknown_alternative(calculator_with_ alternative=invalid_alternative, ) + @pytest.mark.parametrize( "statistics_array,statistics_value,expected_p_value", [ @@ -192,7 +201,7 @@ def test_p_value_calculation_with_different_distributions( ) # Then - assert p_value == pytest.approx( - expected_p_value - ), (f"Failed for statistics_array: {statistics_array}, " - f"statistics_value: {statistics_value} expected {expected_p_value}, got {p_value}") + assert p_value == pytest.approx(expected_p_value), ( + f"Failed for statistics_array: {statistics_array}, " + f"statistics_value: {statistics_value} expected {expected_p_value}, got {p_value}" + ) From 7ee63243d5bab466558efb1e9a67785fc28b61ba Mon Sep 17 00:00:00 2001 From: Alexey Mironov Date: Fri, 12 Dec 2025 16:48:17 +0300 Subject: [PATCH 3/9] Fix tests --- tests/p_value/resolver/test_calculation_resolver.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/tests/p_value/resolver/test_calculation_resolver.py b/tests/p_value/resolver/test_calculation_resolver.py index 74bb1fc..2e657be 100644 --- a/tests/p_value/resolver/test_calculation_resolver.py +++ b/tests/p_value/resolver/test_calculation_resolver.py @@ -5,24 +5,19 @@ covering different hypothesis types, edge cases, and error conditions. """ -from typing import cast from unittest.mock import MagicMock import numpy as np import pytest from pysatl_criterion.p_value.resolver.calculation_resolver import CalculationPValueResolver -from pysatl_criterion.persistence.model.limit_distribution.limit_distribution import ( - ILimitDistributionStorage, - LimitDistributionModel, -) from pysatl_criterion.statistics.models import HypothesisType @pytest.fixture def mock_storage(): """Create a mock storage object for testing.""" - return cast(ILimitDistributionStorage, MagicMock()) + return MagicMock() @pytest.fixture @@ -94,7 +89,7 @@ def test_calculate_p_value_with_statistic_outside_simulation_range(mock_storage) """ # Given - mock_distribution = cast(LimitDistributionModel, MagicMock()) + mock_distribution = MagicMock() mock_distribution.results_statistics = np.array([10, 20, 30, 40, 50]) mock_storage.get_data_for_cv.return_value = mock_distribution calculator = CalculationPValueResolver(limit_distribution_storage=mock_storage) @@ -186,8 +181,8 @@ def test_p_value_calculation_with_different_distributions( expected_p_value: Expected p-value result """ # Given - mock_storage = cast(ILimitDistributionStorage, MagicMock()) - mock_distribution = cast(LimitDistributionModel, MagicMock()) + mock_storage = MagicMock() + mock_distribution = MagicMock() mock_distribution.results_statistics = statistics_array mock_storage.get_data_for_cv.return_value = mock_distribution calculator = CalculationPValueResolver(mock_storage) From 2855203c5b9bd47e9970653d0411e5a094e49081 Mon Sep 17 00:00:00 2001 From: Alexey Mironov Date: Fri, 12 Dec 2025 17:04:09 +0300 Subject: [PATCH 4/9] Fix tests --- .../critical_value/critical_area/model.py | 5 +++-- pysatl_criterion/critical_value/resolver/model.py | 5 +++-- pysatl_criterion/p_value/resolver/model.py | 5 +++-- .../model/common/data_storage/data_storage.py | 11 ++++++++--- .../model/limit_distribution/limit_distribution.py | 7 +++---- 5 files changed, 20 insertions(+), 13 deletions(-) diff --git a/pysatl_criterion/critical_value/critical_area/model.py b/pysatl_criterion/critical_value/critical_area/model.py index 129bbb1..adefc52 100644 --- a/pysatl_criterion/critical_value/critical_area/model.py +++ b/pysatl_criterion/critical_value/critical_area/model.py @@ -1,7 +1,8 @@ -from typing import Protocol +from abc import ABC, abstractmethod -class CriticalArea(Protocol): +class CriticalArea(ABC): + @abstractmethod def contains(self, value: float) -> bool: """ Check critical area contains value. diff --git a/pysatl_criterion/critical_value/resolver/model.py b/pysatl_criterion/critical_value/resolver/model.py index c2af2bb..e257a44 100644 --- a/pysatl_criterion/critical_value/resolver/model.py +++ b/pysatl_criterion/critical_value/resolver/model.py @@ -1,14 +1,15 @@ -from typing import Protocol +from abc import ABC, abstractmethod from pysatl_criterion.critical_value.critical_area.model import CriticalArea from pysatl_criterion.statistics.models import HypothesisType -class CriticalValueResolver(Protocol): +class CriticalValueResolver(ABC): """ Critical value calculator interface. Calculate critical area. """ + @abstractmethod def resolve( self, criterion_code: str, diff --git a/pysatl_criterion/p_value/resolver/model.py b/pysatl_criterion/p_value/resolver/model.py index 7671bd5..028a3f8 100644 --- a/pysatl_criterion/p_value/resolver/model.py +++ b/pysatl_criterion/p_value/resolver/model.py @@ -1,13 +1,14 @@ -from typing_extensions import Protocol +from abc import ABC, abstractmethod from pysatl_criterion.statistics.models import HypothesisType -class PValueResolver(Protocol): +class PValueResolver(ABC): """ P-value resolver. """ + @abstractmethod def resolve( self, criterion_code: str, diff --git a/pysatl_criterion/persistence/model/common/data_storage/data_storage.py b/pysatl_criterion/persistence/model/common/data_storage/data_storage.py index 5582dff..f47f62a 100644 --- a/pysatl_criterion/persistence/model/common/data_storage/data_storage.py +++ b/pysatl_criterion/persistence/model/common/data_storage/data_storage.py @@ -1,12 +1,14 @@ +from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import Protocol, TypeVar +from typing import Generic, TypeVar -class IStorage(Protocol): +class IStorage(ABC): """ Storage interface. """ + @abstractmethod def init(self) -> None: """ Initialize storage. @@ -36,11 +38,12 @@ class DataQuery: Q = TypeVar("Q", contravariant=True, bound=DataQuery) -class IDataStorage(IStorage, Protocol[M, Q]): +class IDataStorage(IStorage, Generic[M, Q], ABC): """ Data storage interface. """ + @abstractmethod def get_data(self, query: Q) -> M | None: """ Get data from data storage. @@ -51,6 +54,7 @@ def get_data(self, query: Q) -> M | None: """ pass + @abstractmethod def insert_data(self, data: M) -> None: """ Insert data to data storage. @@ -61,6 +65,7 @@ def insert_data(self, data: M) -> None: """ pass + @abstractmethod def delete_data(self, query: Q) -> None: """ Delete data from data storage. diff --git a/pysatl_criterion/persistence/model/limit_distribution/limit_distribution.py b/pysatl_criterion/persistence/model/limit_distribution/limit_distribution.py index 1df749a..3a3e7d5 100644 --- a/pysatl_criterion/persistence/model/limit_distribution/limit_distribution.py +++ b/pysatl_criterion/persistence/model/limit_distribution/limit_distribution.py @@ -1,5 +1,5 @@ +from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import Protocol from pysatl_criterion.persistence.model.common.data_storage.data_storage import ( DataModel, @@ -33,13 +33,12 @@ class CriticalValueQuery(DataQuery): sample_size_error: int = 0 -class ILimitDistributionStorage( - IDataStorage[LimitDistributionModel, LimitDistributionQuery], Protocol -): +class ILimitDistributionStorage(IDataStorage[LimitDistributionModel, LimitDistributionQuery], ABC): """ Limit distribution storage interface. """ + @abstractmethod def get_data_for_cv(self, query: CriticalValueQuery) -> LimitDistributionModel | None: """ Get limit distribution data for critical value calculation. From 5c258db4a4c52233b66e8a8046693d7347584639 Mon Sep 17 00:00:00 2001 From: Alexey Mironov Date: Fri, 12 Dec 2025 17:06:42 +0300 Subject: [PATCH 5/9] Fix tests --- .../test/goodness_of_fit_test/goodness_of_fit_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pysatl_criterion/test/goodness_of_fit_test/goodness_of_fit_test.py b/pysatl_criterion/test/goodness_of_fit_test/goodness_of_fit_test.py index 6e8df30..9e5b2f7 100644 --- a/pysatl_criterion/test/goodness_of_fit_test/goodness_of_fit_test.py +++ b/pysatl_criterion/test/goodness_of_fit_test/goodness_of_fit_test.py @@ -22,7 +22,6 @@ class GoodnessOfFitTest: :param significance_level: significance level. :param test_method: test method either 'critical_value' or 'p_value'. :param alternative: test alternative. - """ def __init__( From c600ee367d27bdb13aded351f8fcbb27fdac90bc Mon Sep 17 00:00:00 2001 From: Alexey Mironov Date: Fri, 12 Dec 2025 17:15:48 +0300 Subject: [PATCH 6/9] Fix tests --- tests/p_value/resolver/test_calculation_resolver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/p_value/resolver/test_calculation_resolver.py b/tests/p_value/resolver/test_calculation_resolver.py index 2e657be..c3952ae 100644 --- a/tests/p_value/resolver/test_calculation_resolver.py +++ b/tests/p_value/resolver/test_calculation_resolver.py @@ -17,7 +17,8 @@ @pytest.fixture def mock_storage(): """Create a mock storage object for testing.""" - return MagicMock() + from pysatl_criterion.p_value.resolver.calculation_resolver import ILimitDistributionStorage + return MagicMock(spec=ILimitDistributionStorage) @pytest.fixture From 6b898b4ea3f1dbcb187220b698db648358d5dfd5 Mon Sep 17 00:00:00 2001 From: Alexey Mironov Date: Fri, 12 Dec 2025 17:17:32 +0300 Subject: [PATCH 7/9] Fix tests --- tests/p_value/resolver/test_calculation_resolver.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/p_value/resolver/test_calculation_resolver.py b/tests/p_value/resolver/test_calculation_resolver.py index c3952ae..b73c046 100644 --- a/tests/p_value/resolver/test_calculation_resolver.py +++ b/tests/p_value/resolver/test_calculation_resolver.py @@ -18,6 +18,7 @@ def mock_storage(): """Create a mock storage object for testing.""" from pysatl_criterion.p_value.resolver.calculation_resolver import ILimitDistributionStorage + return MagicMock(spec=ILimitDistributionStorage) From 16fc1f63516a671e0ecdec8ef52c8bdf66c78772 Mon Sep 17 00:00:00 2001 From: Alexey Mironov Date: Fri, 12 Dec 2025 17:26:36 +0300 Subject: [PATCH 8/9] Fix tests --- tests/calc/test_goodness_of_fit_test.py | 88 ++++++++++++------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/tests/calc/test_goodness_of_fit_test.py b/tests/calc/test_goodness_of_fit_test.py index 251814c..6fb3bc8 100644 --- a/tests/calc/test_goodness_of_fit_test.py +++ b/tests/calc/test_goodness_of_fit_test.py @@ -1,87 +1,87 @@ -from typing import cast -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch from pysatl_criterion.critical_value.critical_area.critical_areas import RightCriticalArea -from pysatl_criterion.critical_value.resolver.model import CriticalValueResolver -from pysatl_criterion.p_value.resolver.model import PValueResolver -from pysatl_criterion.statistics.goodness_of_fit import AbstractGoodnessOfFitStatistic from pysatl_criterion.statistics.models import HypothesisType from pysatl_criterion.test.goodness_of_fit_test.goodness_of_fit_test import GoodnessOfFitTest from pysatl_criterion.test.model import TestMethod -def test_goodness_of_fit_cv_path_accepts_hypothesis(): - mock_statistic = MagicMock(spec=AbstractGoodnessOfFitStatistic) - mock_statistic.code.return_value = "test_criterion" - mock_statistic.execute_statistic.return_value = 10.0 +@patch("pysatl_criterion.critical_value.resolver.model.CriticalValueResolver") +@patch("pysatl_criterion.statistics.goodness_of_fit.AbstractGoodnessOfFitStatistic") +def test_goodness_of_fit_cv_path_accepts_hypothesis(mock_stat_cls, mock_cv_cls): + # Setup mocks + mock_stat = mock_stat_cls.return_value + mock_stat.code.return_value = "test_criterion" + mock_stat.execute_statistic.return_value = 10.0 - # Mock the critical value calculator - mock_cv_calculator = cast(CriticalValueResolver, MagicMock()) - mock_cv_calculator.resolve.return_value = MagicMock( - contains=lambda x: x < 15.0 - ) # Accepts hypothesis + mock_cv = mock_cv_cls.return_value + mock_cv.resolve.return_value = MagicMock(contains=lambda x: x < 15.0) gof_test = GoodnessOfFitTest( - statistics=mock_statistic, + statistics=mock_stat, significance_level=0.05, test_method=TestMethod.CRITICAL_VALUE, alternative=HypothesisType.RIGHT, - cv_resolver=mock_cv_calculator, + cv_resolver=mock_cv, ) assert gof_test.test(data=[1, 2, 3]) is True -def test_goodness_of_fit_cv_path_rejects_hypothesis(): - mock_statistic = cast(AbstractGoodnessOfFitStatistic, MagicMock()) - mock_statistic.code.return_value = "test_criterion" - mock_statistic.execute_statistic.return_value = 10.0 +@patch("pysatl_criterion.critical_value.resolver.model.CriticalValueResolver") +@patch("pysatl_criterion.statistics.goodness_of_fit.AbstractGoodnessOfFitStatistic") +def test_goodness_of_fit_cv_path_rejects_hypothesis(mock_stat_cls, mock_cv_cls): + mock_stat = mock_stat_cls.return_value + mock_stat.code.return_value = "test_criterion" + mock_stat.execute_statistic.return_value = 10.0 - # Mock the critical value calculator - mock_cv_calculator = cast(CriticalValueResolver, MagicMock()) - mock_cv_calculator.resolve.return_value = RightCriticalArea(9) + mock_cv = mock_cv_cls.return_value + mock_cv.resolve.return_value = RightCriticalArea(9) gof_test = GoodnessOfFitTest( - statistics=mock_statistic, + statistics=mock_stat, significance_level=0.05, test_method=TestMethod.CRITICAL_VALUE, alternative=HypothesisType.RIGHT, - cv_resolver=mock_cv_calculator, + cv_resolver=mock_cv, ) assert gof_test.test(data=[1, 2, 3]) is False -def test_goodness_of_fit_p_value_path_accepts_hypothesis(mocker): - mock_statistic = MagicMock(spec=AbstractGoodnessOfFitStatistic) - mock_statistic.code.return_value = "test_criterion" - mock_statistic.execute_statistic.return_value = 10.0 +@patch("pysatl_criterion.p_value.resolver.model.PValueResolver") +@patch("pysatl_criterion.statistics.goodness_of_fit.AbstractGoodnessOfFitStatistic") +def test_goodness_of_fit_p_value_path_accepts_hypothesis(mock_stat_cls, mock_p_value_cls): + mock_stat = mock_stat_cls.return_value + mock_stat.code.return_value = "test_criterion" + mock_stat.execute_statistic.return_value = 10.0 - # Mock PValueCalculator using the imported class - mock_p_value_calculator = mocker.patch.object(PValueResolver, "__new__") - mock_p_value_instance = cast(PValueResolver, MagicMock()) - mock_p_value_calculator.return_value = mock_p_value_instance - mock_p_value_instance.resolve.return_value = 0.1 + mock_p_value = mock_p_value_cls.return_value + mock_p_value.resolve.return_value = 0.1 gof_test = GoodnessOfFitTest( - statistics=mock_statistic, significance_level=0.05, test_method=TestMethod.P_VALUE + statistics=mock_stat, + significance_level=0.05, + test_method=TestMethod.P_VALUE, + p_value_resolver=mock_p_value, ) assert gof_test.test(data=[1, 2, 3]) is True -def test_goodness_of_fit_p_value_path_rejects_hypothesis(): - mock_statistic = cast(AbstractGoodnessOfFitStatistic, MagicMock()) - mock_statistic.code.return_value = "test_criterion" - mock_statistic.execute_statistic.return_value = 10.0 +@patch("pysatl_criterion.p_value.resolver.model.PValueResolver") +@patch("pysatl_criterion.statistics.goodness_of_fit.AbstractGoodnessOfFitStatistic") +def test_goodness_of_fit_p_value_path_rejects_hypothesis(mock_stat_cls, mock_p_value_cls): + mock_stat = mock_stat_cls.return_value + mock_stat.code.return_value = "test_criterion" + mock_stat.execute_statistic.return_value = 10.0 - # Mock PValueCalculator using the imported class - mock_p_value_instance = cast(PValueResolver, MagicMock()) - mock_p_value_instance.resolve.return_value = 0.01 + mock_p_value = mock_p_value_cls.return_value + mock_p_value.resolve.return_value = 0.01 gof_test = GoodnessOfFitTest( - p_value_resolver=mock_p_value_instance, - statistics=mock_statistic, + p_value_resolver=mock_p_value, + statistics=mock_stat, significance_level=0.05, test_method=TestMethod.P_VALUE, ) From 69e2d2873bda2a2be5f4e5d4b8a0b537ce073373 Mon Sep 17 00:00:00 2001 From: Alexey Mironov Date: Fri, 12 Dec 2025 17:30:31 +0300 Subject: [PATCH 9/9] Fix tests --- .../resolver/test_calculation_resolver.py | 197 ++++++------------ 1 file changed, 69 insertions(+), 128 deletions(-) diff --git a/tests/p_value/resolver/test_calculation_resolver.py b/tests/p_value/resolver/test_calculation_resolver.py index b73c046..f03809c 100644 --- a/tests/p_value/resolver/test_calculation_resolver.py +++ b/tests/p_value/resolver/test_calculation_resolver.py @@ -5,7 +5,7 @@ covering different hypothesis types, edge cases, and error conditions. """ -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch import numpy as np import pytest @@ -14,152 +14,100 @@ from pysatl_criterion.statistics.models import HypothesisType -@pytest.fixture -def mock_storage(): - """Create a mock storage object for testing.""" - from pysatl_criterion.p_value.resolver.calculation_resolver import ILimitDistributionStorage - - return MagicMock(spec=ILimitDistributionStorage) - - -@pytest.fixture -def mock_distribution(): - """Create a mock distribution object with test statistics.""" - mock_dist = MagicMock() - mock_dist.results_statistics = np.array(range(100)) - return mock_dist - +@patch("pysatl_criterion.p_value.resolver.calculation_resolver.ILimitDistributionStorage") +def test_calculate_p_value_for_different_alternatives(mock_storage_cls): + """ + Test p-value calculation for different hypothesis types using mocked storage. + """ + # Setup mock distribution + mock_distribution = MagicMock() + mock_distribution.results_statistics = np.array(range(100)) -@pytest.fixture -def calculator_with_mock_data(mock_storage, mock_distribution): - """Create a calculator with mocked storage and distribution data.""" + # Setup mock storage + mock_storage = mock_storage_cls.return_value mock_storage.get_data_for_cv.return_value = mock_distribution - return CalculationPValueResolver(mock_storage) - - -@pytest.fixture -def calculator_with_empty_storage(mock_storage): - """Create a calculator with empty storage (returns None).""" - mock_storage.get_data_for_cv.return_value = None - return CalculationPValueResolver(mock_storage) + calculator = CalculationPValueResolver(mock_storage) -@pytest.mark.parametrize( - "alternative,expected_p_value,test_name", - [ + test_cases = [ (HypothesisType.RIGHT, 0.1, "right_tailed"), (HypothesisType.LEFT, 0.9, "left_tailed"), (HypothesisType.TWO_TAILED, 0.2, "two_tailed"), - ], -) -def test_calculate_p_value_for_different_alternatives( - calculator_with_mock_data, alternative, expected_p_value, test_name -): - """ - Test p-value calculation for different hypothesis types. - - Args: - alternative: The hypothesis type to test - expected_p_value: Expected p-value result - test_name: Name of the test case for identification - """ - # Given - criterion_code = "test_criterion" - sample_size = 100 - statistics_value = 89.5 - - # When - p_value = calculator_with_mock_data.resolve( - criterion_code=criterion_code, - sample_size=sample_size, - statistics_value=statistics_value, - alternative=alternative, - ) - - # Then - assert p_value == pytest.approx( - expected_p_value, abs=1e-10 - ), f"Expected p-value {expected_p_value} for {test_name}, got {p_value}" - - -def test_calculate_p_value_with_statistic_outside_simulation_range(mock_storage): - """ - Test p-value calculation when statistic is outside the simulation range. + ] + + for alternative, expected_p_value, test_name in test_cases: + p_value = calculator.resolve( + criterion_code="test_criterion", + sample_size=100, + statistics_value=89.5, + alternative=alternative, + ) + assert p_value == pytest.approx( + expected_p_value, abs=1e-10 + ), f"Expected p-value {expected_p_value} for {test_name}, got {p_value}" - This tests edge cases where the observed statistic is either much higher - or much lower than the simulated distribution range. - """ - # Given +@patch("pysatl_criterion.p_value.resolver.calculation_resolver.ILimitDistributionStorage") +def test_calculate_p_value_with_statistic_outside_simulation_range(mock_storage_cls): + """Test p-value calculation when statistic is outside the simulation range.""" mock_distribution = MagicMock() mock_distribution.results_statistics = np.array([10, 20, 30, 40, 50]) + + mock_storage = mock_storage_cls.return_value mock_storage.get_data_for_cv.return_value = mock_distribution - calculator = CalculationPValueResolver(limit_distribution_storage=mock_storage) - # When & Then - Statistic much higher than simulation range + calculator = CalculationPValueResolver(mock_storage) + + # Statistic above simulation range p_value_high = calculator.resolve( criterion_code="test_criterion", sample_size=10, statistics_value=100.0 ) - assert p_value_high == pytest.approx( - 0.0 - ), "P-value should be 0 for statistic above simulation range" + assert p_value_high == pytest.approx(0.0) - # When & Then - Statistic much lower than simulation range + # Statistic below simulation range p_value_low = calculator.resolve( criterion_code="test_criterion", sample_size=10, statistics_value=5.0 ) - assert p_value_low == pytest.approx( - 1.0 - ), "P-value should be 1 for statistic below simulation range" + assert p_value_low == pytest.approx(1.0) -def test_calculate_p_value_raises_error_when_limit_distribution_not_found( - calculator_with_empty_storage, -): - """ - Test that appropriate error is raised when limit distribution is not found. +@patch("pysatl_criterion.p_value.resolver.calculation_resolver.ILimitDistributionStorage") +def test_calculate_p_value_raises_error_when_limit_distribution_not_found(mock_storage_cls): + """Test error when limit distribution is not found.""" + mock_storage = mock_storage_cls.return_value + mock_storage.get_data_for_cv.return_value = None - This tests the error handling when the storage cannot provide - the required limit distribution data. - """ - # Given - criterion_code = "nonexistent_criterion" - sample_size = 100 - statistics_value = 89.5 + calculator = CalculationPValueResolver(mock_storage) - # When & Then with pytest.raises( ValueError, - match="Limit distribution for criterion nonexistent_criterion " - "and sample size 100 does not exist.", + match="Limit distribution for criterion nonexistent_criterion and " + "sample size 100 does not exist.", ): - calculator_with_empty_storage.resolve( - criterion_code=criterion_code, - sample_size=sample_size, - statistics_value=statistics_value, + calculator.resolve( + criterion_code="nonexistent_criterion", + sample_size=100, + statistics_value=89.5, ) -def test_calculate_p_value_raises_error_for_unknown_alternative(calculator_with_mock_data): - """ - Test that appropriate error is raised for unknown hypothesis alternatives. +@patch("pysatl_criterion.p_value.resolver.calculation_resolver.ILimitDistributionStorage") +def test_calculate_p_value_raises_error_for_unknown_alternative(mock_storage_cls): + """Test error for unknown hypothesis alternative.""" + mock_distribution = MagicMock() + mock_distribution.results_statistics = np.array(range(100)) - This tests the validation of the alternative parameter to ensure - only valid hypothesis types are accepted. - """ - # Given - criterion_code = "test_criterion" - sample_size = 100 - statistics_value = 89.5 - invalid_alternative = "invalid_hypothesis_type" + mock_storage = mock_storage_cls.return_value + mock_storage.get_data_for_cv.return_value = mock_distribution + + calculator = CalculationPValueResolver(mock_storage) - # When & Then with pytest.raises(ValueError, match="Unknown alternative"): - calculator_with_mock_data.resolve( - criterion_code=criterion_code, - sample_size=sample_size, - statistics_value=statistics_value, - alternative=invalid_alternative, + calculator.resolve( + criterion_code="test_criterion", + sample_size=100, + statistics_value=89.5, + alternative="invalid_hypothesis_type", ) @@ -171,25 +119,19 @@ def test_calculate_p_value_raises_error_for_unknown_alternative(calculator_with_ ([1, 1, 1, 1, 1], 1.0, 0.0), ], ) +@patch("pysatl_criterion.p_value.resolver.calculation_resolver.ILimitDistributionStorage") def test_p_value_calculation_with_different_distributions( - statistics_array, statistics_value, expected_p_value + mock_storage_cls, statistics_array, statistics_value, expected_p_value ): - """ - Test p-value calculation with various distribution patterns. - - Args: - statistics_array: Array of simulated statistics - statistics_value: Observed statistic value - expected_p_value: Expected p-value result - """ - # Given - mock_storage = MagicMock() + """Test p-value calculation with various distributions.""" mock_distribution = MagicMock() mock_distribution.results_statistics = statistics_array + + mock_storage = mock_storage_cls.return_value mock_storage.get_data_for_cv.return_value = mock_distribution + calculator = CalculationPValueResolver(mock_storage) - # When p_value = calculator.resolve( criterion_code="test_criterion", sample_size=len(statistics_array), @@ -197,8 +139,7 @@ def test_p_value_calculation_with_different_distributions( alternative=HypothesisType.RIGHT, ) - # Then assert p_value == pytest.approx(expected_p_value), ( f"Failed for statistics_array: {statistics_array}, " - f"statistics_value: {statistics_value} expected {expected_p_value}, got {p_value}" + f"statistics_value: {statistics_value}, expected {expected_p_value}, got {p_value}" )