Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,23 @@ You're all set! You can now import and use the statistical tests in your Python

## PySATL Criterion module usage example:

Statistic calculation example:
```python
# import needed criterion from pysatl_criterion
from pysatl_criterion import KolmogorovSmirnovNormalityGofStatistic
from pysatl_criterion.statistics import KolmogorovSmirnovNormalityGofStatistic


# make a criterion object
criterion = KolmogorovSmirnovNormalityGofStatistic(mean=0, var=1)
statistic = KolmogorovSmirnovNormalityGofStatistic(mean=0, var=1)

# initialize test data
x = [0.1, 0.7, 0.5, 0.3]

# then run algorithm
statistic = criterion.execute_statistic(x)
result = statistic.execute_statistic(x)

# print the results
print(f"Statistic result: {statistic}")
print(f"Statistic result: {result}")
# output:
# Statistic result: 0.539827837277029
```
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ dependencies = [
"scipy>=1.11.2",
"pandas>=2.2.1",
"typing-extensions>=4.12.2",
"networkx ==3.4.2"
"networkx == 3.4.2",
"sqlalchemy == 2.0.41"
]

[project.urls]
Expand Down Expand Up @@ -48,7 +49,6 @@ ruff = "0.7.4"
pytest-mock = "3.14.0"
pre-commit = "4.0.1"
mypy = "1.15.0"
sqlalchemy = "2.0.41"

[tool.isort]
line_length = 100
Expand Down
2 changes: 2 additions & 0 deletions pysatl_criterion/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
LOCAL_LIMIT_DISTRIBUTION_URL = "sqlite:///limit_distributions.sqlite"
REMOTE_LIMIT_DISTRIBUTION_URL = "postgresql://postgres:postgres@localhost/pysatl"
38 changes: 38 additions & 0 deletions pysatl_criterion/critical_value/critical_area/critical_areas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from dataclasses import dataclass

from pysatl_criterion.critical_value.critical_area.model import CriticalArea


@dataclass
class LeftCriticalArea(CriticalArea):
critical_value: float

def __init__(self, critical_value: float):
self.critical_value = critical_value

def contains(self, value: float) -> bool:
return value >= self.critical_value


@dataclass
class RightCriticalArea(CriticalArea):
critical_value: float

def __init__(self, critical_value: float):
self.critical_value = critical_value

def contains(self, value: float) -> bool:
return value <= self.critical_value


@dataclass
class TwoSidedCriticalArea(CriticalArea):
left_cv: float
right_cv: float

def __init__(self, left_cv: float, right_cv: float):
self.left_cv = left_cv
self.right_cv = right_cv

def contains(self, value: float) -> bool:
return self.left_cv <= value <= self.right_cv
11 changes: 11 additions & 0 deletions pysatl_criterion/critical_value/critical_area/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from abc import ABC, abstractmethod


class CriticalArea(ABC):
@abstractmethod
def contains(self, value: float) -> bool:
"""
Check critical area contains value.
:param value: true, if critical area contains value, false otherwise
"""
pass
Empty file.
35 changes: 35 additions & 0 deletions pysatl_criterion/critical_value/loader/remote_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import logging

from pysatl_criterion.persistence.model.limit_distribution.limit_distribution import (
CriticalValueQuery,
ILimitDistributionStorage,
)


class CriticalValueLoader:
def __init__(
self, local_storage: ILimitDistributionStorage, remote_storage: ILimitDistributionStorage
):
self.__local_storage = local_storage
self.__remote_storage = remote_storage

def load(self, criterion_code: str, sample_size: int, sample_size_error: int = 0):
"""
Load data from remote distribution storage to local distribution storage.

:param criterion_code: criterion code
:param sample_size: sample size
:param sample_size_error: sample size error.
Get sample_size - sample_size_error <= sample_size <= sample_size + sample_size_error
"""

logging.info(f"Load criterion {criterion_code} with size {sample_size} from remote")
query = CriticalValueQuery(criterion_code, sample_size, sample_size_error)
remote_data = self.__remote_storage.get_data_for_cv(query)

if remote_data is not None:
self.__local_storage.insert_data(remote_data)
else:
logging.warning(
f"Remote data for criterion {criterion_code} " f"with size {sample_size} not found"
)
Empty file.
55 changes: 55 additions & 0 deletions pysatl_criterion/critical_value/resolver/composite_resolver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from typing_extensions import override

from pysatl_criterion.critical_value.loader.remote_loader import CriticalValueLoader
from pysatl_criterion.critical_value.resolver.model import CriticalArea, CriticalValueResolver
from pysatl_criterion.critical_value.resolver.storage_resolver import StorageCriticalValueResolver
from pysatl_criterion.statistics.models import HypothesisType


class CompositeCriticalValueResolver(CriticalValueResolver):
"""
Critical value composite resolver.
"""

def __init__(
self,
local_resolver: StorageCriticalValueResolver,
remote_resolver: StorageCriticalValueResolver,
):
self._local_resolver = local_resolver
self._remote_resolver = remote_resolver

@override
def resolve(
self,
criterion_code: str,
sample_size: int,
sl: float,
alternative: HypothesisType = HypothesisType.RIGHT,
) -> CriticalArea | None:
"""
Resolve critical value for given criterion.
1. Try to get local value
2. Try to get remote value and cache it to local storage.

:param criterion_code: criterion code.
:param sample_size: sample size.
:param sl: significance level.
:param alternative: test alternative

:return: critical value.
"""

# 1. Try to get local value
result = self._local_resolver.resolve(criterion_code, sample_size, sl, alternative)

if result is not None:
return result

# 2. Try to get remote value and cache it to local storage.
CriticalValueLoader(
self._local_resolver.limit_distribution_storage,
self._remote_resolver.limit_distribution_storage,
).load(criterion_code, sample_size)

return self._local_resolver.resolve(criterion_code, sample_size, sl, alternative)
30 changes: 30 additions & 0 deletions pysatl_criterion/critical_value/resolver/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from abc import ABC, abstractmethod

from pysatl_criterion.critical_value.critical_area.model import CriticalArea
from pysatl_criterion.statistics.models import HypothesisType


class CriticalValueResolver(ABC):
"""
Critical value calculator interface. Calculate critical area.
"""

@abstractmethod
def resolve(
self,
criterion_code: str,
sample_size: int,
sl: float,
alternative: HypothesisType = HypothesisType.RIGHT,
) -> CriticalArea | None:
"""
Resolver critical value for given criterion from storage.

:param criterion_code: criterion code
:param sample_size: sample size
:param sl: significance level
:param alternative: test alternative

:return: critical value if critical value exists, None otherwise
"""
pass
65 changes: 65 additions & 0 deletions pysatl_criterion/critical_value/resolver/storage_resolver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import numpy as np
import scipy.stats as scipy_stats
from typing_extensions import override

from pysatl_criterion.critical_value.critical_area.critical_areas import (
LeftCriticalArea,
RightCriticalArea,
TwoSidedCriticalArea,
)
from pysatl_criterion.critical_value.critical_area.model import CriticalArea
from pysatl_criterion.critical_value.resolver.model import CriticalValueResolver
from pysatl_criterion.persistence.model.limit_distribution.limit_distribution import (
CriticalValueQuery,
ILimitDistributionStorage,
)
from pysatl_criterion.statistics.models import HypothesisType


class StorageCriticalValueResolver(CriticalValueResolver):
"""
Critical value resolver.

:param limit_distribution_storage: limit distribution storage
"""

def __init__(self, limit_distribution_storage: ILimitDistributionStorage):
self.limit_distribution_storage = limit_distribution_storage

@override
def resolve(
self,
criterion_code: str,
sample_size: int,
sl: float,
alternative: HypothesisType = HypothesisType.RIGHT,
) -> CriticalArea | None:
"""
Resolver critical value for given criterion from storage.

:param criterion_code: criterion code
:param sample_size: sample size
:param sl: significance level
:param alternative: test alternative

:return: critical value if critical value exists, None otherwise
"""

query = CriticalValueQuery(criterion_code=criterion_code, sample_size=sample_size)
limit_distribution = self.limit_distribution_storage.get_data_for_cv(query)

if limit_distribution is None:
return None

ecdf = scipy_stats.ecdf(limit_distribution.results_statistics)

if alternative == HypothesisType.RIGHT:
return RightCriticalArea(float(np.quantile(ecdf.cdf.quantiles, q=1 - sl)))
elif alternative == HypothesisType.LEFT:
return LeftCriticalArea(float(np.quantile(ecdf.cdf.quantiles, q=sl)))
elif alternative == HypothesisType.TWO_TAILED:
left = float(np.quantile(ecdf.cdf.quantiles, q=sl / 2))
right = float(np.quantile(ecdf.cdf.quantiles, q=1 - sl / 2))
return TwoSidedCriticalArea(left, right)
else:
raise ValueError(f"Unknown alternative: {alternative}.")
Empty file.
Empty file.
63 changes: 63 additions & 0 deletions pysatl_criterion/p_value/resolver/calculation_resolver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import scipy.stats as scipy_stats
from typing_extensions import override

from pysatl_criterion.p_value.resolver.model import PValueResolver
from pysatl_criterion.persistence.model.limit_distribution.limit_distribution import (
CriticalValueQuery,
ILimitDistributionStorage,
)
from pysatl_criterion.statistics.models import HypothesisType


class CalculationPValueResolver(PValueResolver):
"""
P-value calculator.

:param limit_distribution_storage: limit distribution storage
"""

def __init__(self, limit_distribution_storage: ILimitDistributionStorage):
self.limit_distribution_storage = limit_distribution_storage

@override
def resolve(
self,
criterion_code: str,
sample_size: int,
statistics_value: float,
alternative: HypothesisType = HypothesisType.RIGHT,
) -> float:
"""
Calculate p-value.

:param criterion_code: criterion code
:param sample_size: sample size
:param statistics_value: statistics value
:param alternative: test alternative

:return: p-value
"""

query = CriticalValueQuery(criterion_code=criterion_code, sample_size=sample_size)
limit_distribution_from_db = self.limit_distribution_storage.get_data_for_cv(query)

if limit_distribution_from_db is None:
raise ValueError(
f"Limit distribution for criterion {criterion_code} "
f"and sample size {sample_size} does not exist."
)

simulation_results = limit_distribution_from_db.results_statistics

ecdf = scipy_stats.ecdf(simulation_results)

cdf_value = float(ecdf.cdf.evaluate(statistics_value))

if alternative == HypothesisType.RIGHT:
return 1.0 - cdf_value
elif alternative == HypothesisType.TWO_TAILED:
return 2.0 * min(cdf_value, 1.0 - cdf_value)
elif alternative == HypothesisType.LEFT:
return cdf_value
else:
raise ValueError(f"Unknown alternative {alternative}")
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import scipy.stats as scipy_stats
from typing_extensions import override

from pysatl_criterion.p_value.resolver.model import PValueResolver
from pysatl_criterion.persistence.limit_distribution.sqlite.sqlite import (
SQLiteLimitDistributionStorage,
)
Expand All @@ -9,7 +11,7 @@
from pysatl_criterion.statistics.models import HypothesisType


class PValueCalculator:
class LocalPValueResolver(PValueResolver):
"""
P-value calculator.

Expand All @@ -19,7 +21,8 @@ class PValueCalculator:
def __init__(self, limit_distribution_storage: SQLiteLimitDistributionStorage):
self.limit_distribution_storage = limit_distribution_storage

def calculate_p_value(
@override
def resolve(
self,
criterion_code: str,
sample_size: int,
Expand All @@ -45,9 +48,7 @@ def calculate_p_value(
"Limit distribution for given criterion and sample size does not exist."
)

simulation_results = limit_distribution_from_db.results_statistics

ecdf = scipy_stats.ecdf(simulation_results)
ecdf = scipy_stats.ecdf(limit_distribution_from_db.results_statistics)

Comment on lines +51 to 52
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Тут же потом надо будет что-то из Core добавлять?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Да, потом будем на Core пеезжать когда он появится

cdf_value = ecdf.cdf.evaluate(statistics_value)

Expand Down
Loading