From ecdf6fa8e4fc47fe22309669060e3e60c8912fb2 Mon Sep 17 00:00:00 2001 From: Vladimir Zakharov Date: Thu, 27 Nov 2025 22:27:33 +0300 Subject: [PATCH 1/6] initial commit --- pysatl_criterion/statistics/__init__.py | 65 +- pysatl_criterion/statistics/beta.py | 887 ++++++++++++++++++++++++ tests/statistics/test_beta.py | 509 ++++++++++++++ 3 files changed, 1444 insertions(+), 17 deletions(-) create mode 100644 pysatl_criterion/statistics/beta.py create mode 100644 tests/statistics/test_beta.py diff --git a/pysatl_criterion/statistics/__init__.py b/pysatl_criterion/statistics/__init__.py index 4aae312..2656887 100644 --- a/pysatl_criterion/statistics/__init__.py +++ b/pysatl_criterion/statistics/__init__.py @@ -76,6 +76,21 @@ ZhangWuANormalityGofStatistic, ZhangWuCNormalityGofStatistic, ) +from pysatl_criterion.statistics.beta import ( + AbstractBetaGofStatistic, + AndersonDarlingBetaGofStatistic, + Chi2PearsonBetaGofStatistic, + CrammerVonMisesBetaGofStatistic, + EntropyBetaGofStatistic, + KolmogorovSmirnovBetaGofStatistic, + KuiperBetaGofStatistic, + LillieforsTestBetaGofStatistic, + ModeBetaGofStatistic, + MomentBasedBetaGofStatistic, + RatioBetaGofStatistic, + SkewnessKurtosisBetaGofStatistic, + WatsonBetaGofStatistic, +) from pysatl_criterion.statistics.weibull import ( AbstractWeibullGofStatistic, AndersonDarlingWeibullGofStatistic, @@ -105,29 +120,45 @@ "ADStatistic", "Chi2Statistic", "CrammerVonMisesStatistic", - "KolmogorovSmirnovWeibullGofStatistic", - "SBWeibullGofStatistic", - "SbWeibullGofStatistic", - "RSBWeibullGofStatistic", + "KSStatistic", "LillieforsTest", "MinToshiyukiStatistic", - "WPPWeibullGofStatistic", - "NormalizeSpaceWeibullGofStatistic", + # Beta distribution statistics + "AbstractBetaGofStatistic", + "AndersonDarlingBetaGofStatistic", + "Chi2PearsonBetaGofStatistic", + "CrammerVonMisesBetaGofStatistic", + "EntropyBetaGofStatistic", + "KolmogorovSmirnovBetaGofStatistic", + "KuiperBetaGofStatistic", + "LillieforsTestBetaGofStatistic", + "ModeBetaGofStatistic", + "MomentBasedBetaGofStatistic", + "RatioBetaGofStatistic", + "SkewnessKurtosisBetaGofStatistic", + "WatsonBetaGofStatistic", + # Weibull distribution statistics "AbstractWeibullGofStatistic", - "TikuSinghWeibullGofStatistic", - "ST2WeibullGofStatistic", - "ST1WeibullGofStatistic", - "SPPWeibullGofStatistic", - "REJGWeibullGofStatistic", - "OKWeibullGofStatistic", - "MSFWeibullGofStatistic", - "MinToshiyukiWeibullGofStatistic", + "AndersonDarlingWeibullGofStatistic", + "Chi2PearsonWeibullGofStatistic", "CrammerVonMisesWeibullGofStatistic", + "KolmogorovSmirnovWeibullGofStatistic", "LillieforsWeibullGofStatistic", "LOSWeibullGofStatistic", - "Chi2PearsonWeibullGofStatistic", - "AndersonDarlingWeibullGofStatistic", - "KSStatistic", + "MinToshiyukiWeibullGofStatistic", + "MSFWeibullGofStatistic", + "NormalizeSpaceWeibullGofStatistic", + "OKWeibullGofStatistic", + "REJGWeibullGofStatistic", + "RSBWeibullGofStatistic", + "SBWeibullGofStatistic", + "SbWeibullGofStatistic", + "SPPWeibullGofStatistic", + "ST1WeibullGofStatistic", + "ST2WeibullGofStatistic", + "TikuSinghWeibullGofStatistic", + "WPPWeibullGofStatistic", + # Exponential distribution statistics "AbstractExponentialityGofStatistic", "AhsanullahExponentialityGofStatistic", "AtkinsonExponentialityGofStatistic", diff --git a/pysatl_criterion/statistics/beta.py b/pysatl_criterion/statistics/beta.py new file mode 100644 index 0000000..b218b36 --- /dev/null +++ b/pysatl_criterion/statistics/beta.py @@ -0,0 +1,887 @@ +from abc import ABC + +import numpy as np +import scipy.stats as scipy_stats +from typing_extensions import override + +from pysatl_criterion.statistics.common import ( + ADStatistic, + Chi2Statistic, + CrammerVonMisesStatistic, + KSStatistic, + LillieforsTest, +) +from pysatl_criterion.statistics.goodness_of_fit import AbstractGoodnessOfFitStatistic + + +class AbstractBetaGofStatistic(AbstractGoodnessOfFitStatistic, ABC): + """ + Abstract base class for Beta distribution goodness-of-fit statistics. + + The Beta distribution is a continuous probability distribution defined on the interval [0, 1] + parameterized by two positive shape parameters, denoted by α (alpha) and β (beta). + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + """ + + def __init__(self, alpha=1, beta=1): + if alpha <= 0: + raise ValueError("alpha must be positive") + if beta <= 0: + raise ValueError("beta must be positive") + self.alpha = alpha + self.beta = beta + + @staticmethod + @override + def code(): + return f"BETA_{AbstractGoodnessOfFitStatistic.code()}" + + +class KolmogorovSmirnovBetaGofStatistic(AbstractBetaGofStatistic, KSStatistic): + """ + Kolmogorov-Smirnov test statistic for Beta distribution. + + The Kolmogorov-Smirnov test compares the empirical distribution function + with the theoretical Beta distribution function. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + alternative : {'two-sided', 'less', 'greater'}, optional + Defines the alternative hypothesis. Default is 'two-sided'. + mode : {'auto', 'exact', 'approx', 'asymp'}, optional + Defines the distribution used for calculating the p-value. Default is 'auto'. + + Returns + ------- + statistic : float + The Kolmogorov-Smirnov test statistic + + References + ---------- + .. [1] Massey, F. J. (1951). The Kolmogorov-Smirnov test for goodness of fit. + Journal of the American Statistical Association, 46(253), 68-78. + """ + + def __init__(self, alpha=1, beta=1, alternative="two-sided", mode="auto"): + AbstractBetaGofStatistic.__init__(self, alpha, beta) + KSStatistic.__init__(self, alternative, mode) + + @staticmethod + @override + def code(): + return f"KS_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the Kolmogorov-Smirnov test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = np.asarray(rvs) + if np.any((rvs < 0) | (rvs > 1)): + raise ValueError("Beta distribution values must be in the interval [0, 1]") + + rvs_sorted = np.sort(rvs) + cdf_vals = scipy_stats.beta.cdf(rvs_sorted, self.alpha, self.beta) + return KSStatistic.execute_statistic(self, rvs_sorted, cdf_vals) + + +class AndersonDarlingBetaGofStatistic(AbstractBetaGofStatistic, ADStatistic): + """ + Anderson-Darling test statistic for Beta distribution. + + The Anderson-Darling test is a modification of the Kolmogorov-Smirnov test + that gives more weight to the tails of the distribution. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The Anderson-Darling test statistic + + References + ---------- + .. [1] Anderson, T. W., & Darling, D. A. (1952). Asymptotic theory of certain + "goodness of fit" criteria based on stochastic processes. + The Annals of Mathematical Statistics, 23(2), 193-212. + """ + + @staticmethod + @override + def code(): + return f"AD_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the Anderson-Darling test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = np.asarray(rvs) + if np.any((rvs < 0) | (rvs > 1)): + raise ValueError("Beta distribution values must be in the interval [0, 1]") + + rvs_sorted = np.sort(rvs) + n = len(rvs_sorted) + + # Compute log CDF and log SF (survival function) + logcdf = scipy_stats.beta.logcdf(rvs_sorted, self.alpha, self.beta) + logsf = scipy_stats.beta.logsf(rvs_sorted, self.alpha, self.beta) + + i = np.arange(1, n + 1) + A2 = -n - np.sum((2 * i - 1.0) / n * (logcdf + logsf[::-1])) + + return A2 + + +class CrammerVonMisesBetaGofStatistic(AbstractBetaGofStatistic, CrammerVonMisesStatistic): + """ + Cramér-von Mises test statistic for Beta distribution. + + The Cramér-von Mises test is a goodness-of-fit test that measures the + discrepancy between the empirical distribution function and the theoretical + cumulative distribution function. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The Cramér-von Mises test statistic + + References + ---------- + .. [1] Cramér, H. (1928). On the composition of elementary errors. + Scandinavian Actuarial Journal, 1928(1), 13-74. + .. [2] von Mises, R. (1928). Wahrscheinlichkeit, Statistik und Wahrheit. + Julius Springer. + """ + + @staticmethod + @override + def code(): + return f"CVM_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the Cramér-von Mises test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = np.asarray(rvs) + if np.any((rvs < 0) | (rvs > 1)): + raise ValueError("Beta distribution values must be in the interval [0, 1]") + + rvs_sorted = np.sort(rvs) + cdf_vals = scipy_stats.beta.cdf(rvs_sorted, self.alpha, self.beta) + return CrammerVonMisesStatistic.execute_statistic(self, rvs_sorted, cdf_vals) + + +class LillieforsTestBetaGofStatistic(AbstractBetaGofStatistic, LillieforsTest): + """ + Lilliefors test statistic for Beta distribution. + + The Lilliefors test is a modification of the Kolmogorov-Smirnov test for + the case when parameters are estimated from the data. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The Lilliefors test statistic + + References + ---------- + .. [1] Lilliefors, H. W. (1967). On the Kolmogorov-Smirnov test for normality + with mean and variance unknown. Journal of the American Statistical + Association, 62(318), 399-402. + """ + + @staticmethod + @override + def code(): + return f"LILLIE_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the Lilliefors test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = np.asarray(rvs) + if np.any((rvs < 0) | (rvs > 1)): + raise ValueError("Beta distribution values must be in the interval [0, 1]") + + rvs_sorted = np.sort(rvs) + cdf_vals = scipy_stats.beta.cdf(rvs_sorted, self.alpha, self.beta) + return LillieforsTest.execute_statistic(self, rvs_sorted, cdf_vals) + + +class Chi2PearsonBetaGofStatistic(AbstractBetaGofStatistic, Chi2Statistic): + """ + Pearson's Chi-squared test statistic for Beta distribution. + + The chi-squared test compares observed frequencies with expected frequencies + based on the Beta distribution. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + lambda_ : float, default=1 + Power divergence parameter. Lambda=1 gives Pearson's chi-squared statistic. + + Returns + ------- + statistic : float + The chi-squared test statistic + + References + ---------- + .. [1] Pearson, K. (1900). On the criterion that a given system of deviations from + the probable in the case of a correlated system of variables is such that + it can be reasonably supposed to have arisen from random sampling. + The London, Edinburgh, and Dublin Philosophical Magazine and Journal of + Science, 50(302), 157-175. + """ + + def __init__(self, alpha=1, beta=1, lambda_=1): + AbstractBetaGofStatistic.__init__(self, alpha, beta) + Chi2Statistic.__init__(self) + self.lambda_ = lambda_ + + @staticmethod + @override + def code(): + return f"CHI2_PEARSON_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the Pearson's chi-squared test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = np.asarray(rvs) + if np.any((rvs < 0) | (rvs > 1)): + raise ValueError("Beta distribution values must be in the interval [0, 1]") + + rvs_sorted = np.sort(rvs) + n = len(rvs_sorted) + + # Number of bins using Sturges' rule + num_bins = int(np.ceil(np.sqrt(n))) + + # Create histogram + observed, bin_edges = np.histogram(rvs_sorted, bins=num_bins, range=(0, 1)) + + # Calculate expected frequencies + expected_cdf = scipy_stats.beta.cdf(bin_edges, self.alpha, self.beta) + expected = np.diff(expected_cdf) * n + + return Chi2Statistic.execute_statistic(self, observed, expected, self.lambda_) + + +class WatsonBetaGofStatistic(AbstractBetaGofStatistic): + """ + Watson test statistic for Beta distribution. + + The Watson test is a modification of the Cramér-von Mises test that is + invariant under location changes. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The Watson test statistic + + References + ---------- + .. [1] Watson, G. S. (1961). Goodness-of-fit tests on a circle. + Biometrika, 48(1/2), 109-114. + """ + + @staticmethod + @override + def code(): + return f"W_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the Watson test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = np.asarray(rvs) + if np.any((rvs < 0) | (rvs > 1)): + raise ValueError("Beta distribution values must be in the interval [0, 1]") + + rvs_sorted = np.sort(rvs) + n = len(rvs_sorted) + cdf_vals = scipy_stats.beta.cdf(rvs_sorted, self.alpha, self.beta) + + # Cramér-von Mises statistic + u = (2 * np.arange(1, n + 1) - 1) / (2 * n) + cvm = 1 / (12 * n) + np.sum((u - cdf_vals) ** 2) + + # Watson correction + correction_term = n * (np.mean(cdf_vals) - 0.5) ** 2 + watson_statistic = cvm - correction_term + + return watson_statistic + + +class KuiperBetaGofStatistic(AbstractBetaGofStatistic): + """ + Kuiper test statistic for Beta distribution. + + The Kuiper test is a variant of the Kolmogorov-Smirnov test that is + more sensitive to deviations in the tails of the distribution. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The Kuiper test statistic (D+ + D-) + + References + ---------- + .. [1] Kuiper, N. H. (1960). Tests concerning random points on a circle. + Indagationes Mathematicae, 63, 38-47. + """ + + @staticmethod + @override + def code(): + return f"KUIPER_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the Kuiper test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value (D+ + D-) + """ + rvs = np.asarray(rvs) + if np.any((rvs < 0) | (rvs > 1)): + raise ValueError("Beta distribution values must be in the interval [0, 1]") + + rvs_sorted = np.sort(rvs) + n = len(rvs_sorted) + cdf_vals = scipy_stats.beta.cdf(rvs_sorted, self.alpha, self.beta) + + # D+ = max(i/n - F(x_i)) + d_plus = np.max(np.arange(1, n + 1) / n - cdf_vals) + + # D- = max(F(x_i) - (i-1)/n) + d_minus = np.max(cdf_vals - np.arange(0, n) / n) + + # Kuiper statistic is D+ + D- + return d_plus + d_minus + + +class MomentBasedBetaGofStatistic(AbstractBetaGofStatistic): + """ + Moment-based test statistic for Beta distribution. + + This test compares the sample moments with the theoretical moments of + the Beta distribution. It uses the first two moments (mean and variance). + + For Beta(α, β): + - Mean: μ = α/(α+β) + - Variance: σ² = αβ/[(α+β)²(α+β+1)] + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The moment-based test statistic + + References + ---------- + .. [1] Johnson, N. L., Kotz, S., & Balakrishnan, N. (1995). + Continuous univariate distributions, volume 2 (Vol. 289). + John Wiley & Sons. + """ + + @staticmethod + @override + def code(): + return f"MB_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the moment-based test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = np.asarray(rvs) + if np.any((rvs < 0) | (rvs > 1)): + raise ValueError("Beta distribution values must be in the interval [0, 1]") + + n = len(rvs) + + # Sample moments + sample_mean = np.mean(rvs) + sample_var = np.var(rvs, ddof=1) + + # Theoretical moments + alpha_beta_sum = self.alpha + self.beta + theoretical_mean = self.alpha / alpha_beta_sum + theoretical_var = (self.alpha * self.beta) / ( + alpha_beta_sum**2 * (alpha_beta_sum + 1) + ) + + # Test statistic based on standardized differences + mean_diff = np.sqrt(n) * (sample_mean - theoretical_mean) / np.sqrt(theoretical_var) + var_diff = np.sqrt(n) * (sample_var - theoretical_var) / np.sqrt(theoretical_var) + + # Combined statistic + statistic = mean_diff**2 + var_diff**2 + + return statistic + + +class SkewnessKurtosisBetaGofStatistic(AbstractBetaGofStatistic): + """ + Skewness-Kurtosis test statistic for Beta distribution. + + This test compares the sample skewness and kurtosis with the theoretical + values of the Beta distribution. + + For Beta(α, β): + - Skewness: γ₁ = 2(β-α)√(α+β+1) / [(α+β+2)√(αβ)] + - Kurtosis: γ₂ = 6[(α-β)²(α+β+1) - αβ(α+β+2)] / [αβ(α+β+2)(α+β+3)] + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The skewness-kurtosis test statistic + + References + ---------- + .. [1] Jarque, C. M., & Bera, A. K. (1980). Efficient tests for normality, + homoscedasticity and serial independence of regression residuals. + Economics Letters, 6(3), 255-259. + """ + + @staticmethod + @override + def code(): + return f"SK_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the skewness-kurtosis test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = np.asarray(rvs) + if np.any((rvs < 0) | (rvs > 1)): + raise ValueError("Beta distribution values must be in the interval [0, 1]") + + n = len(rvs) + + # Sample skewness and kurtosis + from scipy.stats import skew, kurtosis + sample_skewness = skew(rvs, bias=False) + sample_kurtosis = kurtosis(rvs, bias=False) + + # Theoretical skewness and kurtosis + alpha_beta_sum = self.alpha + self.beta + theoretical_skewness = ( + 2 * (self.beta - self.alpha) * np.sqrt(alpha_beta_sum + 1) / + ((alpha_beta_sum + 2) * np.sqrt(self.alpha * self.beta)) + ) + + numerator = ( + 6 * ((self.alpha - self.beta)**2 * (alpha_beta_sum + 1) - + self.alpha * self.beta * (alpha_beta_sum + 2)) + ) + denominator = ( + self.alpha * self.beta * (alpha_beta_sum + 2) * (alpha_beta_sum + 3) + ) + theoretical_kurtosis = numerator / denominator + + # Test statistic (similar to Jarque-Bera) + skew_diff = (sample_skewness - theoretical_skewness)**2 + kurt_diff = (sample_kurtosis - theoretical_kurtosis)**2 + + statistic = (n / 6) * skew_diff + (n / 24) * kurt_diff + + return statistic + + +class RatioBetaGofStatistic(AbstractBetaGofStatistic): + """ + Ratio test statistic for Beta distribution. + + This test is based on the ratio of geometric mean to arithmetic mean, + which has a known relationship for the Beta distribution. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The ratio test statistic + + References + ---------- + .. [1] Rao, C. R., & Shanbhag, D. N. (1994). Choquet-Deny type functional + equations with applications to stochastic models. John Wiley & Sons. + """ + + @staticmethod + @override + def code(): + return f"RT_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the ratio test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = np.asarray(rvs) + if np.any((rvs <= 0) | (rvs >= 1)): + raise ValueError("Beta distribution values must be in the open interval (0, 1) for this test") + + n = len(rvs) + + # Sample statistics + arithmetic_mean = np.mean(rvs) + geometric_mean = np.exp(np.mean(np.log(rvs))) + + # Avoid division by zero + if arithmetic_mean == 0: + raise ValueError("Arithmetic mean is zero, cannot compute ratio") + + sample_ratio = geometric_mean / arithmetic_mean + + # Theoretical ratio for Beta distribution + # E[X] = α/(α+β) + # E[log X] = ψ(α) - ψ(α+β) where ψ is digamma function + from scipy.special import psi + + theoretical_mean = self.alpha / (self.alpha + self.beta) + theoretical_log_mean = psi(self.alpha) - psi(self.alpha + self.beta) + theoretical_ratio = np.exp(theoretical_log_mean) / theoretical_mean + + # Test statistic + statistic = np.sqrt(n) * np.abs(sample_ratio - theoretical_ratio) + + return statistic + + +class EntropyBetaGofStatistic(AbstractBetaGofStatistic): + """ + Entropy-based test statistic for Beta distribution. + + This test compares the sample entropy (estimated using kernel density) + with the theoretical entropy of the Beta distribution. + + The differential entropy of Beta(α, β) is: + H = ln(B(α,β)) - (α-1)ψ(α) - (β-1)ψ(β) + (α+β-2)ψ(α+β) + where B is the Beta function and ψ is the digamma function. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The entropy-based test statistic + + References + ---------- + .. [1] Vasicek, O. (1976). A test for normality based on sample entropy. + Journal of the Royal Statistical Society: Series B (Methodological), + 38(1), 54-59. + """ + + @staticmethod + @override + def code(): + return f"ENT_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the entropy-based test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + m : int, optional + Window size for entropy estimation. Default is n//4. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = np.asarray(rvs) + if np.any((rvs < 0) | (rvs > 1)): + raise ValueError("Beta distribution values must be in the interval [0, 1]") + + n = len(rvs) + m = kwargs.get('m', n // 4) + + # Sort the data + rvs_sorted = np.sort(rvs) + + # Vasicek's entropy estimator + sample_entropy = 0 + for i in range(n): + left = max(0, i - m) + right = min(n - 1, i + m) + window_range = rvs_sorted[right] - rvs_sorted[left] + if window_range > 0: + sample_entropy += np.log(window_range * n / (2 * m)) + + sample_entropy /= n + + # Theoretical entropy + from scipy.special import betaln, psi + + theoretical_entropy = ( + betaln(self.alpha, self.beta) - + (self.alpha - 1) * psi(self.alpha) - + (self.beta - 1) * psi(self.beta) + + (self.alpha + self.beta - 2) * psi(self.alpha + self.beta) + ) + + # Test statistic + statistic = np.sqrt(n) * np.abs(sample_entropy - theoretical_entropy) + + return statistic + + +class ModeBetaGofStatistic(AbstractBetaGofStatistic): + """ + Mode-based test statistic for Beta distribution. + + For Beta(α, β) with α, β > 1, the mode is (α-1)/(α+β-2). + This test compares the sample mode (estimated) with the theoretical mode. + + Parameters + ---------- + alpha : float, default=2 + Shape parameter α > 1 + beta : float, default=2 + Shape parameter β > 1 + + Returns + ------- + statistic : float + The mode-based test statistic + + References + ---------- + .. [1] Chernoff, H. (1964). Estimation of the mode. + Annals of the Institute of Statistical Mathematics, 16(1), 31-41. + """ + + def __init__(self, alpha=2, beta=2): + if alpha <= 1: + raise ValueError("alpha must be greater than 1 for mode to be well-defined") + if beta <= 1: + raise ValueError("beta must be greater than 1 for mode to be well-defined") + super().__init__(alpha, beta) + + @staticmethod + @override + def code(): + return f"MODE_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the mode-based test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + bandwidth : float, optional + Bandwidth for kernel density estimation. If None, uses Scott's rule. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = np.asarray(rvs) + if np.any((rvs < 0) | (rvs > 1)): + raise ValueError("Beta distribution values must be in the interval [0, 1]") + + n = len(rvs) + + # Estimate sample mode using kernel density estimation + from scipy.stats import gaussian_kde + + kde = gaussian_kde(rvs) + x_grid = np.linspace(0.01, 0.99, 200) + density = kde(x_grid) + sample_mode = x_grid[np.argmax(density)] + + # Theoretical mode + theoretical_mode = (self.alpha - 1) / (self.alpha + self.beta - 2) + + # Test statistic + statistic = np.sqrt(n) * np.abs(sample_mode - theoretical_mode) + + return statistic diff --git a/tests/statistics/test_beta.py b/tests/statistics/test_beta.py new file mode 100644 index 0000000..ed8c1d6 --- /dev/null +++ b/tests/statistics/test_beta.py @@ -0,0 +1,509 @@ +import numpy as np +import pytest +import scipy.stats as scipy_stats + +from pysatl_criterion.statistics.beta import ( + AbstractBetaGofStatistic, + AndersonDarlingBetaGofStatistic, + Chi2PearsonBetaGofStatistic, + CrammerVonMisesBetaGofStatistic, + EntropyBetaGofStatistic, + KolmogorovSmirnovBetaGofStatistic, + KuiperBetaGofStatistic, + LillieforsTestBetaGofStatistic, + ModeBetaGofStatistic, + MomentBasedBetaGofStatistic, + RatioBetaGofStatistic, + SkewnessKurtosisBetaGofStatistic, + WatsonBetaGofStatistic, +) + + +def test_abstract_beta_criterion_code(): + """Test that the abstract Beta class returns correct code.""" + assert "BETA_GOODNESS_OF_FIT" == AbstractBetaGofStatistic.code() + + +def test_abstract_beta_criterion_parameters(): + """Test parameter validation for AbstractBetaGofStatistic.""" + # Test with concrete class since AbstractBetaGofStatistic is abstract + # Valid parameters + stat = KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=3) + assert stat.alpha == 2 + assert stat.beta == 3 + + # Invalid alpha + with pytest.raises(ValueError, match="alpha must be positive"): + KolmogorovSmirnovBetaGofStatistic(alpha=-1, beta=2) + + with pytest.raises(ValueError, match="alpha must be positive"): + KolmogorovSmirnovBetaGofStatistic(alpha=0, beta=2) + + # Invalid beta + with pytest.raises(ValueError, match="beta must be positive"): + KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=-1) + + with pytest.raises(ValueError, match="beta must be positive"): + KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=0) + + +class TestKolmogorovSmirnovBetaGofStatistic: + """Tests for Kolmogorov-Smirnov test statistic.""" + + def test_code(self): + """Test that the KS statistic returns correct code.""" + assert "KS_BETA_GOODNESS_OF_FIT" == KolmogorovSmirnovBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("alpha", "beta", "seed", "n"), + [ + (2, 5, 42, 50), + (1, 1, 123, 100), # Uniform distribution + (0.5, 0.5, 456, 75), + (5, 2, 789, 200), + ], + ) + def test_ks_with_generated_data(self, alpha, beta, seed, n): + """Test KS statistic with data generated from Beta distribution.""" + np.random.seed(seed) + data = scipy_stats.beta.rvs(alpha, beta, size=n) + + stat = KolmogorovSmirnovBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + + # Statistic should be non-negative + assert statistic_value >= 0 + + # For data from the correct distribution, statistic should be relatively small + # (though this is a probabilistic statement) + assert statistic_value < 0.5 # Reasonable upper bound for good fit + + def test_ks_with_wrong_distribution(self): + """Test KS statistic with data from wrong distribution.""" + np.random.seed(42) + # Generate from Beta(5, 2) but test against Beta(2, 5) + data = scipy_stats.beta.rvs(5, 2, size=100) + + stat = KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=5) + statistic_value = stat.execute_statistic(data) + + # Should detect the mismatch + assert statistic_value > 0.1 + + def test_ks_validation_errors(self): + """Test that KS statistic validates input data.""" + stat = KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=3) + + # Data outside [0, 1] + with pytest.raises(ValueError, match="Beta distribution values must be in the interval"): + stat.execute_statistic([0.5, 1.5, 0.3]) + + with pytest.raises(ValueError, match="Beta distribution values must be in the interval"): + stat.execute_statistic([-0.1, 0.5, 0.8]) + + +class TestAndersonDarlingBetaGofStatistic: + """Tests for Anderson-Darling test statistic.""" + + def test_code(self): + """Test that the AD statistic returns correct code.""" + assert "AD_BETA_GOODNESS_OF_FIT" == AndersonDarlingBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("alpha", "beta", "seed", "n"), + [ + (2, 5, 42, 50), + (1, 1, 123, 100), + (3, 3, 456, 75), + ], + ) + def test_ad_with_generated_data(self, alpha, beta, seed, n): + """Test AD statistic with data generated from Beta distribution.""" + np.random.seed(seed) + data = scipy_stats.beta.rvs(alpha, beta, size=n) + + stat = AndersonDarlingBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + + # Statistic should be finite + assert np.isfinite(statistic_value) + + # For data from the correct distribution, statistic should be reasonable + assert statistic_value < 10 # Reasonable upper bound for good fit + + def test_ad_validation_errors(self): + """Test that AD statistic validates input data.""" + stat = AndersonDarlingBetaGofStatistic(alpha=2, beta=3) + + with pytest.raises(ValueError, match="Beta distribution values must be in the interval"): + stat.execute_statistic([0.5, 1.5, 0.3]) + + +class TestCrammerVonMisesBetaGofStatistic: + """Tests for Cramér-von Mises test statistic.""" + + def test_code(self): + """Test that the CVM statistic returns correct code.""" + assert "CVM_BETA_GOODNESS_OF_FIT" == CrammerVonMisesBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("alpha", "beta", "seed", "n"), + [ + (2, 5, 42, 50), + (1, 1, 123, 100), + (3, 3, 456, 75), + ], + ) + def test_cvm_with_generated_data(self, alpha, beta, seed, n): + """Test CVM statistic with data generated from Beta distribution.""" + np.random.seed(seed) + data = scipy_stats.beta.rvs(alpha, beta, size=n) + + stat = CrammerVonMisesBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + + # Statistic should be non-negative + assert statistic_value >= 0 + + # For data from the correct distribution, statistic should be small + assert statistic_value < 1 # Reasonable upper bound for good fit + + def test_cvm_validation_errors(self): + """Test that CVM statistic validates input data.""" + stat = CrammerVonMisesBetaGofStatistic(alpha=2, beta=3) + + with pytest.raises(ValueError, match="Beta distribution values must be in the interval"): + stat.execute_statistic([0.5, 1.5, 0.3]) + + +class TestLillieforsTestBetaGofStatistic: + """Tests for Lilliefors test statistic.""" + + def test_code(self): + """Test that the Lilliefors statistic returns correct code.""" + assert "LILLIE_BETA_GOODNESS_OF_FIT" == LillieforsTestBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("alpha", "beta", "seed", "n"), + [ + (2, 5, 42, 50), + (3, 3, 123, 100), + ], + ) + def test_lillie_with_generated_data(self, alpha, beta, seed, n): + """Test Lilliefors statistic with data generated from Beta distribution.""" + np.random.seed(seed) + data = scipy_stats.beta.rvs(alpha, beta, size=n) + + stat = LillieforsTestBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + + # Statistic should be non-negative + assert statistic_value >= 0 + assert statistic_value < 0.5 + + +class TestChi2PearsonBetaGofStatistic: + """Tests for Pearson's Chi-squared test statistic.""" + + def test_code(self): + """Test that the Chi-squared statistic returns correct code.""" + assert "CHI2_PEARSON_BETA_GOODNESS_OF_FIT" == Chi2PearsonBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("alpha", "beta", "seed", "n"), + [ + (2, 5, 42, 100), + (3, 3, 123, 200), + ], + ) + def test_chi2_with_generated_data(self, alpha, beta, seed, n): + """Test Chi-squared statistic with data generated from Beta distribution.""" + np.random.seed(seed) + data = scipy_stats.beta.rvs(alpha, beta, size=n) + + stat = Chi2PearsonBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + + # Statistic should be non-negative + assert statistic_value >= 0 + assert np.isfinite(statistic_value) + + +class TestWatsonBetaGofStatistic: + """Tests for Watson test statistic.""" + + def test_code(self): + """Test that the Watson statistic returns correct code.""" + assert "W_BETA_GOODNESS_OF_FIT" == WatsonBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("alpha", "beta", "seed", "n"), + [ + (2, 5, 42, 50), + (3, 3, 123, 100), + ], + ) + def test_watson_with_generated_data(self, alpha, beta, seed, n): + """Test Watson statistic with data generated from Beta distribution.""" + np.random.seed(seed) + data = scipy_stats.beta.rvs(alpha, beta, size=n) + + stat = WatsonBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + + # Statistic should be finite + assert np.isfinite(statistic_value) + + +class TestKuiperBetaGofStatistic: + """Tests for Kuiper test statistic.""" + + def test_code(self): + """Test that the Kuiper statistic returns correct code.""" + assert "KUIPER_BETA_GOODNESS_OF_FIT" == KuiperBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("alpha", "beta", "seed", "n"), + [ + (2, 5, 42, 50), + (3, 3, 123, 100), + ], + ) + def test_kuiper_with_generated_data(self, alpha, beta, seed, n): + """Test Kuiper statistic with data generated from Beta distribution.""" + np.random.seed(seed) + data = scipy_stats.beta.rvs(alpha, beta, size=n) + + stat = KuiperBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + + # Statistic should be non-negative + assert statistic_value >= 0 + assert statistic_value < 1 # Reasonable upper bound + + +class TestMomentBasedBetaGofStatistic: + """Tests for Moment-based test statistic.""" + + def test_code(self): + """Test that the Moment-based statistic returns correct code.""" + assert "MB_BETA_GOODNESS_OF_FIT" == MomentBasedBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("alpha", "beta", "seed", "n"), + [ + (2, 5, 42, 100), + (3, 3, 123, 200), + ], + ) + def test_moment_with_generated_data(self, alpha, beta, seed, n): + """Test Moment-based statistic with data generated from Beta distribution.""" + np.random.seed(seed) + data = scipy_stats.beta.rvs(alpha, beta, size=n) + + stat = MomentBasedBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + + # Statistic should be non-negative + assert statistic_value >= 0 + assert np.isfinite(statistic_value) + + def test_moment_expected_value(self): + """Test that moment-based statistic is small for correct distribution.""" + np.random.seed(42) + # Generate large sample for stable moments + data = scipy_stats.beta.rvs(2, 5, size=1000) + + stat = MomentBasedBetaGofStatistic(alpha=2, beta=5) + statistic_value = stat.execute_statistic(data) + + # For correct distribution and large sample, should be relatively small + assert statistic_value < 10 + + +class TestSkewnessKurtosisBetaGofStatistic: + """Tests for Skewness-Kurtosis test statistic.""" + + def test_code(self): + """Test that the Skewness-Kurtosis statistic returns correct code.""" + assert "SK_BETA_GOODNESS_OF_FIT" == SkewnessKurtosisBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("alpha", "beta", "seed", "n"), + [ + (2, 5, 42, 100), + (3, 3, 123, 200), + ], + ) + def test_sk_with_generated_data(self, alpha, beta, seed, n): + """Test Skewness-Kurtosis statistic with data generated from Beta distribution.""" + np.random.seed(seed) + data = scipy_stats.beta.rvs(alpha, beta, size=n) + + stat = SkewnessKurtosisBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + + # Statistic should be non-negative + assert statistic_value >= 0 + assert np.isfinite(statistic_value) + + +class TestRatioBetaGofStatistic: + """Tests for Ratio test statistic.""" + + def test_code(self): + """Test that the Ratio statistic returns correct code.""" + assert "RT_BETA_GOODNESS_OF_FIT" == RatioBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("alpha", "beta", "seed", "n"), + [ + (2, 5, 42, 100), + (3, 3, 123, 200), + ], + ) + def test_ratio_with_generated_data(self, alpha, beta, seed, n): + """Test Ratio statistic with data generated from Beta distribution.""" + np.random.seed(seed) + # Add small noise to avoid exact 0 or 1 + data = scipy_stats.beta.rvs(alpha, beta, size=n) + data = np.clip(data, 0.001, 0.999) + + stat = RatioBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + + # Statistic should be non-negative + assert statistic_value >= 0 + assert np.isfinite(statistic_value) + + def test_ratio_validation_errors(self): + """Test that Ratio statistic validates input data.""" + stat = RatioBetaGofStatistic(alpha=2, beta=3) + + # Data with exact 0 or 1 (for log calculation) + with pytest.raises(ValueError, match="Beta distribution values must be in the open interval"): + stat.execute_statistic([0.5, 1.0, 0.3]) + + +class TestEntropyBetaGofStatistic: + """Tests for Entropy-based test statistic.""" + + def test_code(self): + """Test that the Entropy statistic returns correct code.""" + assert "ENT_BETA_GOODNESS_OF_FIT" == EntropyBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("alpha", "beta", "seed", "n"), + [ + (2, 5, 42, 100), + (3, 3, 123, 200), + ], + ) + def test_entropy_with_generated_data(self, alpha, beta, seed, n): + """Test Entropy statistic with data generated from Beta distribution.""" + np.random.seed(seed) + data = scipy_stats.beta.rvs(alpha, beta, size=n) + + stat = EntropyBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + + # Statistic should be non-negative + assert statistic_value >= 0 + assert np.isfinite(statistic_value) + + +class TestModeBetaGofStatistic: + """Tests for Mode-based test statistic.""" + + def test_code(self): + """Test that the Mode statistic returns correct code.""" + assert "MODE_BETA_GOODNESS_OF_FIT" == ModeBetaGofStatistic.code() + + def test_mode_parameters_validation(self): + """Test that Mode statistic validates alpha and beta > 1.""" + # Valid parameters + stat = ModeBetaGofStatistic(alpha=2, beta=2) + assert stat.alpha == 2 + assert stat.beta == 2 + + # Invalid alpha + with pytest.raises(ValueError, match="alpha must be greater than 1"): + ModeBetaGofStatistic(alpha=1, beta=2) + + # Invalid beta + with pytest.raises(ValueError, match="beta must be greater than 1"): + ModeBetaGofStatistic(alpha=2, beta=1) + + @pytest.mark.parametrize( + ("alpha", "beta", "seed", "n"), + [ + (2, 5, 42, 200), + (3, 3, 123, 300), + ], + ) + def test_mode_with_generated_data(self, alpha, beta, seed, n): + """Test Mode statistic with data generated from Beta distribution.""" + np.random.seed(seed) + data = scipy_stats.beta.rvs(alpha, beta, size=n) + + stat = ModeBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + + # Statistic should be non-negative + assert statistic_value >= 0 + assert np.isfinite(statistic_value) + + +# Integration tests +class TestBetaIntegration: + """Integration tests for Beta distribution statistics.""" + + def test_uniform_distribution(self): + """Test that Beta(1,1) equals uniform distribution.""" + np.random.seed(42) + data = np.random.uniform(0, 1, 100) + + # Should fit well to Beta(1, 1) + ks_stat = KolmogorovSmirnovBetaGofStatistic(alpha=1, beta=1) + ks_value = ks_stat.execute_statistic(data) + + # Should be relatively small + assert ks_value < 0.2 + + def test_all_statistics_run(self): + """Test that all statistics can be computed without errors.""" + np.random.seed(42) + data = scipy_stats.beta.rvs(2, 5, size=100) + # Clip for tests that need open interval + data_clipped = np.clip(data, 0.001, 0.999) + + statistics = [ + KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=5), + AndersonDarlingBetaGofStatistic(alpha=2, beta=5), + CrammerVonMisesBetaGofStatistic(alpha=2, beta=5), + LillieforsTestBetaGofStatistic(alpha=2, beta=5), + Chi2PearsonBetaGofStatistic(alpha=2, beta=5), + WatsonBetaGofStatistic(alpha=2, beta=5), + KuiperBetaGofStatistic(alpha=2, beta=5), + MomentBasedBetaGofStatistic(alpha=2, beta=5), + SkewnessKurtosisBetaGofStatistic(alpha=2, beta=5), + EntropyBetaGofStatistic(alpha=2, beta=5), + ModeBetaGofStatistic(alpha=2, beta=5), + ] + + for stat in statistics: + try: + if isinstance(stat, RatioBetaGofStatistic): + value = stat.execute_statistic(data_clipped) + else: + value = stat.execute_statistic(data) + assert np.isfinite(value), f"Statistic {stat.code()} returned non-finite value" + except Exception as e: + pytest.fail(f"Statistic {stat.code()} raised an exception: {e}") + + # Also test Ratio with clipped data + ratio_stat = RatioBetaGofStatistic(alpha=2, beta=5) + ratio_value = ratio_stat.execute_statistic(data_clipped) + assert np.isfinite(ratio_value) + From e090f1fb9484d24c359fd85c60c083e53fe39fa3 Mon Sep 17 00:00:00 2001 From: Vladimir Zakharov Date: Thu, 27 Nov 2025 22:38:43 +0300 Subject: [PATCH 2/6] fix style --- pysatl_criterion/statistics/__init__.py | 30 ++--- pysatl_criterion/statistics/beta.py | 41 +++--- tests/statistics/test_beta.py | 165 ++++++++++++------------ 3 files changed, 119 insertions(+), 117 deletions(-) diff --git a/pysatl_criterion/statistics/__init__.py b/pysatl_criterion/statistics/__init__.py index 2656887..83dfe88 100644 --- a/pysatl_criterion/statistics/__init__.py +++ b/pysatl_criterion/statistics/__init__.py @@ -1,3 +1,18 @@ +from pysatl_criterion.statistics.beta import ( + AbstractBetaGofStatistic, + AndersonDarlingBetaGofStatistic, + Chi2PearsonBetaGofStatistic, + CrammerVonMisesBetaGofStatistic, + EntropyBetaGofStatistic, + KolmogorovSmirnovBetaGofStatistic, + KuiperBetaGofStatistic, + LillieforsTestBetaGofStatistic, + ModeBetaGofStatistic, + MomentBasedBetaGofStatistic, + RatioBetaGofStatistic, + SkewnessKurtosisBetaGofStatistic, + WatsonBetaGofStatistic, +) from pysatl_criterion.statistics.common import ( ADStatistic, Chi2Statistic, @@ -76,21 +91,6 @@ ZhangWuANormalityGofStatistic, ZhangWuCNormalityGofStatistic, ) -from pysatl_criterion.statistics.beta import ( - AbstractBetaGofStatistic, - AndersonDarlingBetaGofStatistic, - Chi2PearsonBetaGofStatistic, - CrammerVonMisesBetaGofStatistic, - EntropyBetaGofStatistic, - KolmogorovSmirnovBetaGofStatistic, - KuiperBetaGofStatistic, - LillieforsTestBetaGofStatistic, - ModeBetaGofStatistic, - MomentBasedBetaGofStatistic, - RatioBetaGofStatistic, - SkewnessKurtosisBetaGofStatistic, - WatsonBetaGofStatistic, -) from pysatl_criterion.statistics.weibull import ( AbstractWeibullGofStatistic, AndersonDarlingWeibullGofStatistic, diff --git a/pysatl_criterion/statistics/beta.py b/pysatl_criterion/statistics/beta.py index b218b36..03fbe67 100644 --- a/pysatl_criterion/statistics/beta.py +++ b/pysatl_criterion/statistics/beta.py @@ -545,9 +545,7 @@ def execute_statistic(self, rvs, **kwargs): # Theoretical moments alpha_beta_sum = self.alpha + self.beta theoretical_mean = self.alpha / alpha_beta_sum - theoretical_var = (self.alpha * self.beta) / ( - alpha_beta_sum**2 * (alpha_beta_sum + 1) - ) + theoretical_var = (self.alpha * self.beta) / (alpha_beta_sum**2 * (alpha_beta_sum + 1)) # Test statistic based on standardized differences mean_diff = np.sqrt(n) * (sample_mean - theoretical_mean) / np.sqrt(theoretical_var) @@ -616,29 +614,30 @@ def execute_statistic(self, rvs, **kwargs): n = len(rvs) # Sample skewness and kurtosis - from scipy.stats import skew, kurtosis + from scipy.stats import kurtosis, skew + sample_skewness = skew(rvs, bias=False) sample_kurtosis = kurtosis(rvs, bias=False) # Theoretical skewness and kurtosis alpha_beta_sum = self.alpha + self.beta theoretical_skewness = ( - 2 * (self.beta - self.alpha) * np.sqrt(alpha_beta_sum + 1) / - ((alpha_beta_sum + 2) * np.sqrt(self.alpha * self.beta)) + 2 + * (self.beta - self.alpha) + * np.sqrt(alpha_beta_sum + 1) + / ((alpha_beta_sum + 2) * np.sqrt(self.alpha * self.beta)) ) - numerator = ( - 6 * ((self.alpha - self.beta)**2 * (alpha_beta_sum + 1) - - self.alpha * self.beta * (alpha_beta_sum + 2)) - ) - denominator = ( - self.alpha * self.beta * (alpha_beta_sum + 2) * (alpha_beta_sum + 3) + numerator = 6 * ( + (self.alpha - self.beta) ** 2 * (alpha_beta_sum + 1) + - self.alpha * self.beta * (alpha_beta_sum + 2) ) + denominator = self.alpha * self.beta * (alpha_beta_sum + 2) * (alpha_beta_sum + 3) theoretical_kurtosis = numerator / denominator # Test statistic (similar to Jarque-Bera) - skew_diff = (sample_skewness - theoretical_skewness)**2 - kurt_diff = (sample_kurtosis - theoretical_kurtosis)**2 + skew_diff = (sample_skewness - theoretical_skewness) ** 2 + kurt_diff = (sample_kurtosis - theoretical_kurtosis) ** 2 statistic = (n / 6) * skew_diff + (n / 24) * kurt_diff @@ -692,7 +691,9 @@ def execute_statistic(self, rvs, **kwargs): """ rvs = np.asarray(rvs) if np.any((rvs <= 0) | (rvs >= 1)): - raise ValueError("Beta distribution values must be in the open interval (0, 1) for this test") + raise ValueError( + "Beta distribution values must be in the open interval (0, 1) for this test" + ) n = len(rvs) @@ -778,7 +779,7 @@ def execute_statistic(self, rvs, **kwargs): raise ValueError("Beta distribution values must be in the interval [0, 1]") n = len(rvs) - m = kwargs.get('m', n // 4) + m = kwargs.get("m", n // 4) # Sort the data rvs_sorted = np.sort(rvs) @@ -798,10 +799,10 @@ def execute_statistic(self, rvs, **kwargs): from scipy.special import betaln, psi theoretical_entropy = ( - betaln(self.alpha, self.beta) - - (self.alpha - 1) * psi(self.alpha) - - (self.beta - 1) * psi(self.beta) + - (self.alpha + self.beta - 2) * psi(self.alpha + self.beta) + betaln(self.alpha, self.beta) + - (self.alpha - 1) * psi(self.alpha) + - (self.beta - 1) * psi(self.beta) + + (self.alpha + self.beta - 2) * psi(self.alpha + self.beta) ) # Test statistic diff --git a/tests/statistics/test_beta.py b/tests/statistics/test_beta.py index ed8c1d6..3ad2b61 100644 --- a/tests/statistics/test_beta.py +++ b/tests/statistics/test_beta.py @@ -31,29 +31,29 @@ def test_abstract_beta_criterion_parameters(): stat = KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=3) assert stat.alpha == 2 assert stat.beta == 3 - + # Invalid alpha with pytest.raises(ValueError, match="alpha must be positive"): KolmogorovSmirnovBetaGofStatistic(alpha=-1, beta=2) - + with pytest.raises(ValueError, match="alpha must be positive"): KolmogorovSmirnovBetaGofStatistic(alpha=0, beta=2) - + # Invalid beta with pytest.raises(ValueError, match="beta must be positive"): KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=-1) - + with pytest.raises(ValueError, match="beta must be positive"): KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=0) class TestKolmogorovSmirnovBetaGofStatistic: """Tests for Kolmogorov-Smirnov test statistic.""" - + def test_code(self): """Test that the KS statistic returns correct code.""" assert "KS_BETA_GOODNESS_OF_FIT" == KolmogorovSmirnovBetaGofStatistic.code() - + @pytest.mark.parametrize( ("alpha", "beta", "seed", "n"), [ @@ -67,48 +67,48 @@ def test_ks_with_generated_data(self, alpha, beta, seed, n): """Test KS statistic with data generated from Beta distribution.""" np.random.seed(seed) data = scipy_stats.beta.rvs(alpha, beta, size=n) - + stat = KolmogorovSmirnovBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - + # Statistic should be non-negative assert statistic_value >= 0 - + # For data from the correct distribution, statistic should be relatively small # (though this is a probabilistic statement) assert statistic_value < 0.5 # Reasonable upper bound for good fit - + def test_ks_with_wrong_distribution(self): """Test KS statistic with data from wrong distribution.""" np.random.seed(42) # Generate from Beta(5, 2) but test against Beta(2, 5) data = scipy_stats.beta.rvs(5, 2, size=100) - + stat = KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=5) statistic_value = stat.execute_statistic(data) - + # Should detect the mismatch assert statistic_value > 0.1 - + def test_ks_validation_errors(self): """Test that KS statistic validates input data.""" stat = KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=3) - + # Data outside [0, 1] with pytest.raises(ValueError, match="Beta distribution values must be in the interval"): stat.execute_statistic([0.5, 1.5, 0.3]) - + with pytest.raises(ValueError, match="Beta distribution values must be in the interval"): stat.execute_statistic([-0.1, 0.5, 0.8]) class TestAndersonDarlingBetaGofStatistic: """Tests for Anderson-Darling test statistic.""" - + def test_code(self): """Test that the AD statistic returns correct code.""" assert "AD_BETA_GOODNESS_OF_FIT" == AndersonDarlingBetaGofStatistic.code() - + @pytest.mark.parametrize( ("alpha", "beta", "seed", "n"), [ @@ -121,31 +121,31 @@ def test_ad_with_generated_data(self, alpha, beta, seed, n): """Test AD statistic with data generated from Beta distribution.""" np.random.seed(seed) data = scipy_stats.beta.rvs(alpha, beta, size=n) - + stat = AndersonDarlingBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - + # Statistic should be finite assert np.isfinite(statistic_value) - + # For data from the correct distribution, statistic should be reasonable assert statistic_value < 10 # Reasonable upper bound for good fit - + def test_ad_validation_errors(self): """Test that AD statistic validates input data.""" stat = AndersonDarlingBetaGofStatistic(alpha=2, beta=3) - + with pytest.raises(ValueError, match="Beta distribution values must be in the interval"): stat.execute_statistic([0.5, 1.5, 0.3]) class TestCrammerVonMisesBetaGofStatistic: """Tests for Cramér-von Mises test statistic.""" - + def test_code(self): """Test that the CVM statistic returns correct code.""" assert "CVM_BETA_GOODNESS_OF_FIT" == CrammerVonMisesBetaGofStatistic.code() - + @pytest.mark.parametrize( ("alpha", "beta", "seed", "n"), [ @@ -158,31 +158,31 @@ def test_cvm_with_generated_data(self, alpha, beta, seed, n): """Test CVM statistic with data generated from Beta distribution.""" np.random.seed(seed) data = scipy_stats.beta.rvs(alpha, beta, size=n) - + stat = CrammerVonMisesBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - + # Statistic should be non-negative assert statistic_value >= 0 - + # For data from the correct distribution, statistic should be small assert statistic_value < 1 # Reasonable upper bound for good fit - + def test_cvm_validation_errors(self): """Test that CVM statistic validates input data.""" stat = CrammerVonMisesBetaGofStatistic(alpha=2, beta=3) - + with pytest.raises(ValueError, match="Beta distribution values must be in the interval"): stat.execute_statistic([0.5, 1.5, 0.3]) class TestLillieforsTestBetaGofStatistic: """Tests for Lilliefors test statistic.""" - + def test_code(self): """Test that the Lilliefors statistic returns correct code.""" assert "LILLIE_BETA_GOODNESS_OF_FIT" == LillieforsTestBetaGofStatistic.code() - + @pytest.mark.parametrize( ("alpha", "beta", "seed", "n"), [ @@ -194,10 +194,10 @@ def test_lillie_with_generated_data(self, alpha, beta, seed, n): """Test Lilliefors statistic with data generated from Beta distribution.""" np.random.seed(seed) data = scipy_stats.beta.rvs(alpha, beta, size=n) - + stat = LillieforsTestBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - + # Statistic should be non-negative assert statistic_value >= 0 assert statistic_value < 0.5 @@ -205,11 +205,11 @@ def test_lillie_with_generated_data(self, alpha, beta, seed, n): class TestChi2PearsonBetaGofStatistic: """Tests for Pearson's Chi-squared test statistic.""" - + def test_code(self): """Test that the Chi-squared statistic returns correct code.""" assert "CHI2_PEARSON_BETA_GOODNESS_OF_FIT" == Chi2PearsonBetaGofStatistic.code() - + @pytest.mark.parametrize( ("alpha", "beta", "seed", "n"), [ @@ -221,10 +221,10 @@ def test_chi2_with_generated_data(self, alpha, beta, seed, n): """Test Chi-squared statistic with data generated from Beta distribution.""" np.random.seed(seed) data = scipy_stats.beta.rvs(alpha, beta, size=n) - + stat = Chi2PearsonBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - + # Statistic should be non-negative assert statistic_value >= 0 assert np.isfinite(statistic_value) @@ -232,11 +232,11 @@ def test_chi2_with_generated_data(self, alpha, beta, seed, n): class TestWatsonBetaGofStatistic: """Tests for Watson test statistic.""" - + def test_code(self): """Test that the Watson statistic returns correct code.""" assert "W_BETA_GOODNESS_OF_FIT" == WatsonBetaGofStatistic.code() - + @pytest.mark.parametrize( ("alpha", "beta", "seed", "n"), [ @@ -248,21 +248,21 @@ def test_watson_with_generated_data(self, alpha, beta, seed, n): """Test Watson statistic with data generated from Beta distribution.""" np.random.seed(seed) data = scipy_stats.beta.rvs(alpha, beta, size=n) - + stat = WatsonBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - + # Statistic should be finite assert np.isfinite(statistic_value) class TestKuiperBetaGofStatistic: """Tests for Kuiper test statistic.""" - + def test_code(self): """Test that the Kuiper statistic returns correct code.""" assert "KUIPER_BETA_GOODNESS_OF_FIT" == KuiperBetaGofStatistic.code() - + @pytest.mark.parametrize( ("alpha", "beta", "seed", "n"), [ @@ -274,10 +274,10 @@ def test_kuiper_with_generated_data(self, alpha, beta, seed, n): """Test Kuiper statistic with data generated from Beta distribution.""" np.random.seed(seed) data = scipy_stats.beta.rvs(alpha, beta, size=n) - + stat = KuiperBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - + # Statistic should be non-negative assert statistic_value >= 0 assert statistic_value < 1 # Reasonable upper bound @@ -285,11 +285,11 @@ def test_kuiper_with_generated_data(self, alpha, beta, seed, n): class TestMomentBasedBetaGofStatistic: """Tests for Moment-based test statistic.""" - + def test_code(self): """Test that the Moment-based statistic returns correct code.""" assert "MB_BETA_GOODNESS_OF_FIT" == MomentBasedBetaGofStatistic.code() - + @pytest.mark.parametrize( ("alpha", "beta", "seed", "n"), [ @@ -301,34 +301,34 @@ def test_moment_with_generated_data(self, alpha, beta, seed, n): """Test Moment-based statistic with data generated from Beta distribution.""" np.random.seed(seed) data = scipy_stats.beta.rvs(alpha, beta, size=n) - + stat = MomentBasedBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - + # Statistic should be non-negative assert statistic_value >= 0 assert np.isfinite(statistic_value) - + def test_moment_expected_value(self): """Test that moment-based statistic is small for correct distribution.""" np.random.seed(42) # Generate large sample for stable moments data = scipy_stats.beta.rvs(2, 5, size=1000) - + stat = MomentBasedBetaGofStatistic(alpha=2, beta=5) statistic_value = stat.execute_statistic(data) - + # For correct distribution and large sample, should be relatively small assert statistic_value < 10 class TestSkewnessKurtosisBetaGofStatistic: """Tests for Skewness-Kurtosis test statistic.""" - + def test_code(self): """Test that the Skewness-Kurtosis statistic returns correct code.""" assert "SK_BETA_GOODNESS_OF_FIT" == SkewnessKurtosisBetaGofStatistic.code() - + @pytest.mark.parametrize( ("alpha", "beta", "seed", "n"), [ @@ -340,10 +340,10 @@ def test_sk_with_generated_data(self, alpha, beta, seed, n): """Test Skewness-Kurtosis statistic with data generated from Beta distribution.""" np.random.seed(seed) data = scipy_stats.beta.rvs(alpha, beta, size=n) - + stat = SkewnessKurtosisBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - + # Statistic should be non-negative assert statistic_value >= 0 assert np.isfinite(statistic_value) @@ -351,11 +351,11 @@ def test_sk_with_generated_data(self, alpha, beta, seed, n): class TestRatioBetaGofStatistic: """Tests for Ratio test statistic.""" - + def test_code(self): """Test that the Ratio statistic returns correct code.""" assert "RT_BETA_GOODNESS_OF_FIT" == RatioBetaGofStatistic.code() - + @pytest.mark.parametrize( ("alpha", "beta", "seed", "n"), [ @@ -369,30 +369,32 @@ def test_ratio_with_generated_data(self, alpha, beta, seed, n): # Add small noise to avoid exact 0 or 1 data = scipy_stats.beta.rvs(alpha, beta, size=n) data = np.clip(data, 0.001, 0.999) - + stat = RatioBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - + # Statistic should be non-negative assert statistic_value >= 0 assert np.isfinite(statistic_value) - + def test_ratio_validation_errors(self): """Test that Ratio statistic validates input data.""" stat = RatioBetaGofStatistic(alpha=2, beta=3) - + # Data with exact 0 or 1 (for log calculation) - with pytest.raises(ValueError, match="Beta distribution values must be in the open interval"): + with pytest.raises( + ValueError, match="Beta distribution values must be in the open interval" + ): stat.execute_statistic([0.5, 1.0, 0.3]) class TestEntropyBetaGofStatistic: """Tests for Entropy-based test statistic.""" - + def test_code(self): """Test that the Entropy statistic returns correct code.""" assert "ENT_BETA_GOODNESS_OF_FIT" == EntropyBetaGofStatistic.code() - + @pytest.mark.parametrize( ("alpha", "beta", "seed", "n"), [ @@ -404,10 +406,10 @@ def test_entropy_with_generated_data(self, alpha, beta, seed, n): """Test Entropy statistic with data generated from Beta distribution.""" np.random.seed(seed) data = scipy_stats.beta.rvs(alpha, beta, size=n) - + stat = EntropyBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - + # Statistic should be non-negative assert statistic_value >= 0 assert np.isfinite(statistic_value) @@ -415,26 +417,26 @@ def test_entropy_with_generated_data(self, alpha, beta, seed, n): class TestModeBetaGofStatistic: """Tests for Mode-based test statistic.""" - + def test_code(self): """Test that the Mode statistic returns correct code.""" assert "MODE_BETA_GOODNESS_OF_FIT" == ModeBetaGofStatistic.code() - + def test_mode_parameters_validation(self): """Test that Mode statistic validates alpha and beta > 1.""" # Valid parameters stat = ModeBetaGofStatistic(alpha=2, beta=2) assert stat.alpha == 2 assert stat.beta == 2 - + # Invalid alpha with pytest.raises(ValueError, match="alpha must be greater than 1"): ModeBetaGofStatistic(alpha=1, beta=2) - + # Invalid beta with pytest.raises(ValueError, match="beta must be greater than 1"): ModeBetaGofStatistic(alpha=2, beta=1) - + @pytest.mark.parametrize( ("alpha", "beta", "seed", "n"), [ @@ -446,10 +448,10 @@ def test_mode_with_generated_data(self, alpha, beta, seed, n): """Test Mode statistic with data generated from Beta distribution.""" np.random.seed(seed) data = scipy_stats.beta.rvs(alpha, beta, size=n) - + stat = ModeBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - + # Statistic should be non-negative assert statistic_value >= 0 assert np.isfinite(statistic_value) @@ -458,26 +460,26 @@ def test_mode_with_generated_data(self, alpha, beta, seed, n): # Integration tests class TestBetaIntegration: """Integration tests for Beta distribution statistics.""" - + def test_uniform_distribution(self): """Test that Beta(1,1) equals uniform distribution.""" np.random.seed(42) data = np.random.uniform(0, 1, 100) - + # Should fit well to Beta(1, 1) ks_stat = KolmogorovSmirnovBetaGofStatistic(alpha=1, beta=1) ks_value = ks_stat.execute_statistic(data) - + # Should be relatively small assert ks_value < 0.2 - + def test_all_statistics_run(self): """Test that all statistics can be computed without errors.""" np.random.seed(42) data = scipy_stats.beta.rvs(2, 5, size=100) # Clip for tests that need open interval data_clipped = np.clip(data, 0.001, 0.999) - + statistics = [ KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=5), AndersonDarlingBetaGofStatistic(alpha=2, beta=5), @@ -491,7 +493,7 @@ def test_all_statistics_run(self): EntropyBetaGofStatistic(alpha=2, beta=5), ModeBetaGofStatistic(alpha=2, beta=5), ] - + for stat in statistics: try: if isinstance(stat, RatioBetaGofStatistic): @@ -501,9 +503,8 @@ def test_all_statistics_run(self): assert np.isfinite(value), f"Statistic {stat.code()} returned non-finite value" except Exception as e: pytest.fail(f"Statistic {stat.code()} raised an exception: {e}") - + # Also test Ratio with clipped data ratio_stat = RatioBetaGofStatistic(alpha=2, beta=5) ratio_value = ratio_stat.execute_statistic(data_clipped) assert np.isfinite(ratio_value) - From ffe3af3d107c154213678da9601f44d3c9af72bf Mon Sep 17 00:00:00 2001 From: Dmitrii Deruzhinskii <91655905+terebellum@users.noreply.github.com> Date: Wed, 3 Dec 2025 12:50:30 +0300 Subject: [PATCH 3/6] tests: Refactor tests for goodness-of-fit statistics to use precomputed expected values instead of generated data. --- tests/statistics/test_beta.py | 588 +++++++++++++++++++++++++++------- 1 file changed, 470 insertions(+), 118 deletions(-) diff --git a/tests/statistics/test_beta.py b/tests/statistics/test_beta.py index 3ad2b61..0866d7e 100644 --- a/tests/statistics/test_beta.py +++ b/tests/statistics/test_beta.py @@ -55,28 +55,101 @@ def test_code(self): assert "KS_BETA_GOODNESS_OF_FIT" == KolmogorovSmirnovBetaGofStatistic.code() @pytest.mark.parametrize( - ("alpha", "beta", "seed", "n"), + ("data", "alpha", "beta", "result"), [ - (2, 5, 42, 50), - (1, 1, 123, 100), # Uniform distribution - (0.5, 0.5, 456, 75), - (5, 2, 789, 200), + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.1877694, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 0.2081872, + ), + ( + [ + 0.6994383163, + 0.5174900183, + 0.5925767197, + 0.5611698399, + 0.9395027265, + 0.6820294051, + 0.4379703453, + 0.5303633637, + ], + 0.5, + 0.5, + 0.4604087, + ), + ( + [ + 0.7482953444, + 0.8762706351, + 0.4603376651, + 0.7471648301, + 0.8904978771, + 0.7206587486, + 0.8250535532, + 0.7494859221, + 0.8777684524, + 0.9167142296, + 0.9568773740, + ], + 5, + 2, + 0.3749592, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.2160485, + ), ], ) - def test_ks_with_generated_data(self, alpha, beta, seed, n): - """Test KS statistic with data generated from Beta distribution.""" - np.random.seed(seed) - data = scipy_stats.beta.rvs(alpha, beta, size=n) - + def test_ks_with_parametrized_data(self, data, alpha, beta, result): + """Test KS statistic with precomputed expected values.""" stat = KolmogorovSmirnovBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - - # Statistic should be non-negative - assert statistic_value >= 0 - - # For data from the correct distribution, statistic should be relatively small - # (though this is a probabilistic statement) - assert statistic_value < 0.5 # Reasonable upper bound for good fit + assert result == pytest.approx(statistic_value, 0.00001) def test_ks_with_wrong_distribution(self): """Test KS statistic with data from wrong distribution.""" @@ -110,26 +183,68 @@ def test_code(self): assert "AD_BETA_GOODNESS_OF_FIT" == AndersonDarlingBetaGofStatistic.code() @pytest.mark.parametrize( - ("alpha", "beta", "seed", "n"), + ("data", "alpha", "beta", "result"), [ - (2, 5, 42, 50), - (1, 1, 123, 100), - (3, 3, 456, 75), + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.3746031, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 0.7490749, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.3998264, + ), ], ) - def test_ad_with_generated_data(self, alpha, beta, seed, n): - """Test AD statistic with data generated from Beta distribution.""" - np.random.seed(seed) - data = scipy_stats.beta.rvs(alpha, beta, size=n) - + def test_ad_with_parametrized_data(self, data, alpha, beta, result): + """Test AD statistic with precomputed expected values.""" stat = AndersonDarlingBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - - # Statistic should be finite - assert np.isfinite(statistic_value) - - # For data from the correct distribution, statistic should be reasonable - assert statistic_value < 10 # Reasonable upper bound for good fit + assert result == pytest.approx(statistic_value, 0.00001) def test_ad_validation_errors(self): """Test that AD statistic validates input data.""" @@ -147,26 +262,68 @@ def test_code(self): assert "CVM_BETA_GOODNESS_OF_FIT" == CrammerVonMisesBetaGofStatistic.code() @pytest.mark.parametrize( - ("alpha", "beta", "seed", "n"), + ("data", "alpha", "beta", "result"), [ - (2, 5, 42, 50), - (1, 1, 123, 100), - (3, 3, 456, 75), + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.0565441, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 0.1208704, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.0692098, + ), ], ) - def test_cvm_with_generated_data(self, alpha, beta, seed, n): - """Test CVM statistic with data generated from Beta distribution.""" - np.random.seed(seed) - data = scipy_stats.beta.rvs(alpha, beta, size=n) - + def test_cvm_with_parametrized_data(self, data, alpha, beta, result): + """Test CVM statistic with precomputed expected values.""" stat = CrammerVonMisesBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - - # Statistic should be non-negative - assert statistic_value >= 0 - - # For data from the correct distribution, statistic should be small - assert statistic_value < 1 # Reasonable upper bound for good fit + assert result == pytest.approx(statistic_value, 0.00001) def test_cvm_validation_errors(self): """Test that CVM statistic validates input data.""" @@ -184,23 +341,49 @@ def test_code(self): assert "LILLIE_BETA_GOODNESS_OF_FIT" == LillieforsTestBetaGofStatistic.code() @pytest.mark.parametrize( - ("alpha", "beta", "seed", "n"), + ("data", "alpha", "beta", "result"), [ - (2, 5, 42, 50), - (3, 3, 123, 100), + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.1877694, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.2160485, + ), ], ) - def test_lillie_with_generated_data(self, alpha, beta, seed, n): - """Test Lilliefors statistic with data generated from Beta distribution.""" - np.random.seed(seed) - data = scipy_stats.beta.rvs(alpha, beta, size=n) - + def test_lillie_with_parametrized_data(self, data, alpha, beta, result): + """Test Lilliefors statistic with precomputed expected values.""" stat = LillieforsTestBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - - # Statistic should be non-negative - assert statistic_value >= 0 - assert statistic_value < 0.5 + assert result == pytest.approx(statistic_value, 0.00001) class TestChi2PearsonBetaGofStatistic: @@ -238,22 +421,68 @@ def test_code(self): assert "W_BETA_GOODNESS_OF_FIT" == WatsonBetaGofStatistic.code() @pytest.mark.parametrize( - ("alpha", "beta", "seed", "n"), + ("data", "alpha", "beta", "result"), [ - (2, 5, 42, 50), - (3, 3, 123, 100), + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.0302649, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 0.1096339, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.0430198, + ), ], ) - def test_watson_with_generated_data(self, alpha, beta, seed, n): - """Test Watson statistic with data generated from Beta distribution.""" - np.random.seed(seed) - data = scipy_stats.beta.rvs(alpha, beta, size=n) - + def test_watson_with_parametrized_data(self, data, alpha, beta, result): + """Test Watson statistic with precomputed expected values.""" stat = WatsonBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - - # Statistic should be finite - assert np.isfinite(statistic_value) + assert result == pytest.approx(statistic_value, 0.00001) class TestKuiperBetaGofStatistic: @@ -264,23 +493,68 @@ def test_code(self): assert "KUIPER_BETA_GOODNESS_OF_FIT" == KuiperBetaGofStatistic.code() @pytest.mark.parametrize( - ("alpha", "beta", "seed", "n"), + ("data", "alpha", "beta", "result"), [ - (2, 5, 42, 50), - (3, 3, 123, 100), + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.2567761, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 0.3450400, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.2919412, + ), ], ) - def test_kuiper_with_generated_data(self, alpha, beta, seed, n): - """Test Kuiper statistic with data generated from Beta distribution.""" - np.random.seed(seed) - data = scipy_stats.beta.rvs(alpha, beta, size=n) - + def test_kuiper_with_parametrized_data(self, data, alpha, beta, result): + """Test Kuiper statistic with precomputed expected values.""" stat = KuiperBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - - # Statistic should be non-negative - assert statistic_value >= 0 - assert statistic_value < 1 # Reasonable upper bound + assert result == pytest.approx(statistic_value, 0.00001) class TestMomentBasedBetaGofStatistic: @@ -291,35 +565,68 @@ def test_code(self): assert "MB_BETA_GOODNESS_OF_FIT" == MomentBasedBetaGofStatistic.code() @pytest.mark.parametrize( - ("alpha", "beta", "seed", "n"), + ("data", "alpha", "beta", "result"), [ - (2, 5, 42, 100), - (3, 3, 123, 200), + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.2349020, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 0.3372358, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.2891104, + ), ], ) - def test_moment_with_generated_data(self, alpha, beta, seed, n): - """Test Moment-based statistic with data generated from Beta distribution.""" - np.random.seed(seed) - data = scipy_stats.beta.rvs(alpha, beta, size=n) - + def test_moment_with_parametrized_data(self, data, alpha, beta, result): + """Test Moment-based statistic with precomputed expected values.""" stat = MomentBasedBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - - # Statistic should be non-negative - assert statistic_value >= 0 - assert np.isfinite(statistic_value) - - def test_moment_expected_value(self): - """Test that moment-based statistic is small for correct distribution.""" - np.random.seed(42) - # Generate large sample for stable moments - data = scipy_stats.beta.rvs(2, 5, size=1000) - - stat = MomentBasedBetaGofStatistic(alpha=2, beta=5) - statistic_value = stat.execute_statistic(data) - - # For correct distribution and large sample, should be relatively small - assert statistic_value < 10 + assert result == pytest.approx(statistic_value, 0.00001) class TestSkewnessKurtosisBetaGofStatistic: @@ -330,23 +637,68 @@ def test_code(self): assert "SK_BETA_GOODNESS_OF_FIT" == SkewnessKurtosisBetaGofStatistic.code() @pytest.mark.parametrize( - ("alpha", "beta", "seed", "n"), + ("data", "alpha", "beta", "result"), [ - (2, 5, 42, 100), - (3, 3, 123, 200), + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.7277307, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 0.2648237, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.2303306, + ), ], ) - def test_sk_with_generated_data(self, alpha, beta, seed, n): - """Test Skewness-Kurtosis statistic with data generated from Beta distribution.""" - np.random.seed(seed) - data = scipy_stats.beta.rvs(alpha, beta, size=n) - + def test_sk_with_parametrized_data(self, data, alpha, beta, result): + """Test Skewness-Kurtosis statistic with precomputed expected values.""" stat = SkewnessKurtosisBetaGofStatistic(alpha=alpha, beta=beta) statistic_value = stat.execute_statistic(data) - - # Statistic should be non-negative - assert statistic_value >= 0 - assert np.isfinite(statistic_value) + assert result == pytest.approx(statistic_value, 0.00001) class TestRatioBetaGofStatistic: From 3ef5131e06152609bad27b149ecfcd880e6a2ba1 Mon Sep 17 00:00:00 2001 From: Aleksei Tokarev Date: Fri, 5 Dec 2025 14:57:13 +0300 Subject: [PATCH 4/6] tests: Add missing precomputed value tests for remaining Beta statistics - Add regression tests for Chi2, Ratio, Entropy, and Mode statistics - Ensure all Beta distribution statistics now have accuracy verification --- tests/statistics/test_beta.py | 237 ++++++++++++++++++++++++++++++++++ 1 file changed, 237 insertions(+) diff --git a/tests/statistics/test_beta.py b/tests/statistics/test_beta.py index 0866d7e..39589f4 100644 --- a/tests/statistics/test_beta.py +++ b/tests/statistics/test_beta.py @@ -412,6 +412,70 @@ def test_chi2_with_generated_data(self, alpha, beta, seed, n): assert statistic_value >= 0 assert np.isfinite(statistic_value) + @pytest.mark.parametrize( + ("data", "alpha", "beta", "result"), + [ + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 1.30301816, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 3.33333333, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 1.13560740, + ), + ], + ) + def test_chi2_with_parametrized_data(self, data, alpha, beta, result): + """Test Chi-squared statistic with precomputed expected values.""" + stat = Chi2PearsonBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + assert result == pytest.approx(statistic_value, 0.00001) + class TestWatsonBetaGofStatistic: """Tests for Watson test statistic.""" @@ -729,6 +793,70 @@ def test_ratio_with_generated_data(self, alpha, beta, seed, n): assert statistic_value >= 0 assert np.isfinite(statistic_value) + @pytest.mark.parametrize( + ("data", "alpha", "beta", "result"), + [ + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.20054649, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 0.62062110, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.02561022, + ), + ], + ) + def test_ratio_with_parametrized_data(self, data, alpha, beta, result): + """Test Ratio statistic with precomputed expected values.""" + stat = RatioBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + assert result == pytest.approx(statistic_value, 0.00001) + def test_ratio_validation_errors(self): """Test that Ratio statistic validates input data.""" stat = RatioBetaGofStatistic(alpha=2, beta=3) @@ -766,6 +894,70 @@ def test_entropy_with_generated_data(self, alpha, beta, seed, n): assert statistic_value >= 0 assert np.isfinite(statistic_value) + @pytest.mark.parametrize( + ("data", "alpha", "beta", "result"), + [ + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 1.46457551, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 1.64485770, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.86392069, + ), + ], + ) + def test_entropy_with_parametrized_data(self, data, alpha, beta, result): + """Test Entropy statistic with precomputed expected values.""" + stat = EntropyBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + assert result == pytest.approx(statistic_value, 0.00001) + class TestModeBetaGofStatistic: """Tests for Mode-based test statistic.""" @@ -808,6 +1000,51 @@ def test_mode_with_generated_data(self, alpha, beta, seed, n): assert statistic_value >= 0 assert np.isfinite(statistic_value) + @pytest.mark.parametrize( + ("data", "alpha", "beta", "result"), + [ + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.03766130, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.39711215, + ), + ], + ) + def test_mode_with_parametrized_data(self, data, alpha, beta, result): + """Test Mode statistic with precomputed expected values.""" + stat = ModeBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + assert result == pytest.approx(statistic_value, 0.00001) + # Integration tests class TestBetaIntegration: From 2dba1ce1a531f7907989d61ba1e5279c9bdf4549 Mon Sep 17 00:00:00 2001 From: TrueKotenka Date: Sat, 13 Dec 2025 12:40:43 +0300 Subject: [PATCH 5/6] fix repeats --- pysatl_criterion/statistics/beta.py | 65 ++++++++++++----------------- 1 file changed, 27 insertions(+), 38 deletions(-) diff --git a/pysatl_criterion/statistics/beta.py b/pysatl_criterion/statistics/beta.py index 03fbe67..f9fe637 100644 --- a/pysatl_criterion/statistics/beta.py +++ b/pysatl_criterion/statistics/beta.py @@ -37,6 +37,21 @@ def __init__(self, alpha=1, beta=1): self.alpha = alpha self.beta = beta + @staticmethod + def _validate_rvs(rvs, open_interval=False): + rvs = np.asarray(rvs) + if open_interval: + # All values strictly between 0 and 1 + if np.any((rvs <= 0) | (rvs >= 1)): + raise ValueError( + "Beta distribution values must be in the open interval (0, 1) for this test" + ) + else: + # All values in [0, 1] + if np.any((rvs < 0) | (rvs > 1)): + raise ValueError("Beta distribution values must be in the interval [0, 1]") + return rvs + @staticmethod @override def code(): @@ -96,9 +111,7 @@ def execute_statistic(self, rvs, **kwargs): statistic : float The test statistic value """ - rvs = np.asarray(rvs) - if np.any((rvs < 0) | (rvs > 1)): - raise ValueError("Beta distribution values must be in the interval [0, 1]") + rvs = self._validate_rvs(rvs) rvs_sorted = np.sort(rvs) cdf_vals = scipy_stats.beta.cdf(rvs_sorted, self.alpha, self.beta) @@ -151,9 +164,7 @@ def execute_statistic(self, rvs, **kwargs): statistic : float The test statistic value """ - rvs = np.asarray(rvs) - if np.any((rvs < 0) | (rvs > 1)): - raise ValueError("Beta distribution values must be in the interval [0, 1]") + rvs = self._validate_rvs(rvs) rvs_sorted = np.sort(rvs) n = len(rvs_sorted) @@ -216,9 +227,7 @@ def execute_statistic(self, rvs, **kwargs): statistic : float The test statistic value """ - rvs = np.asarray(rvs) - if np.any((rvs < 0) | (rvs > 1)): - raise ValueError("Beta distribution values must be in the interval [0, 1]") + rvs = self._validate_rvs(rvs) rvs_sorted = np.sort(rvs) cdf_vals = scipy_stats.beta.cdf(rvs_sorted, self.alpha, self.beta) @@ -271,9 +280,7 @@ def execute_statistic(self, rvs, **kwargs): statistic : float The test statistic value """ - rvs = np.asarray(rvs) - if np.any((rvs < 0) | (rvs > 1)): - raise ValueError("Beta distribution values must be in the interval [0, 1]") + rvs = self._validate_rvs(rvs) rvs_sorted = np.sort(rvs) cdf_vals = scipy_stats.beta.cdf(rvs_sorted, self.alpha, self.beta) @@ -335,9 +342,7 @@ def execute_statistic(self, rvs, **kwargs): statistic : float The test statistic value """ - rvs = np.asarray(rvs) - if np.any((rvs < 0) | (rvs > 1)): - raise ValueError("Beta distribution values must be in the interval [0, 1]") + rvs = self._validate_rvs(rvs) rvs_sorted = np.sort(rvs) n = len(rvs_sorted) @@ -400,9 +405,7 @@ def execute_statistic(self, rvs, **kwargs): statistic : float The test statistic value """ - rvs = np.asarray(rvs) - if np.any((rvs < 0) | (rvs > 1)): - raise ValueError("Beta distribution values must be in the interval [0, 1]") + rvs = self._validate_rvs(rvs) rvs_sorted = np.sort(rvs) n = len(rvs_sorted) @@ -464,9 +467,7 @@ def execute_statistic(self, rvs, **kwargs): statistic : float The test statistic value (D+ + D-) """ - rvs = np.asarray(rvs) - if np.any((rvs < 0) | (rvs > 1)): - raise ValueError("Beta distribution values must be in the interval [0, 1]") + rvs = self._validate_rvs(rvs) rvs_sorted = np.sort(rvs) n = len(rvs_sorted) @@ -532,9 +533,7 @@ def execute_statistic(self, rvs, **kwargs): statistic : float The test statistic value """ - rvs = np.asarray(rvs) - if np.any((rvs < 0) | (rvs > 1)): - raise ValueError("Beta distribution values must be in the interval [0, 1]") + rvs = self._validate_rvs(rvs) n = len(rvs) @@ -607,9 +606,7 @@ def execute_statistic(self, rvs, **kwargs): statistic : float The test statistic value """ - rvs = np.asarray(rvs) - if np.any((rvs < 0) | (rvs > 1)): - raise ValueError("Beta distribution values must be in the interval [0, 1]") + rvs = self._validate_rvs(rvs) n = len(rvs) @@ -689,11 +686,7 @@ def execute_statistic(self, rvs, **kwargs): statistic : float The test statistic value """ - rvs = np.asarray(rvs) - if np.any((rvs <= 0) | (rvs >= 1)): - raise ValueError( - "Beta distribution values must be in the open interval (0, 1) for this test" - ) + rvs = self._validate_rvs(rvs, open_interval=True) n = len(rvs) @@ -774,9 +767,7 @@ def execute_statistic(self, rvs, **kwargs): statistic : float The test statistic value """ - rvs = np.asarray(rvs) - if np.any((rvs < 0) | (rvs > 1)): - raise ValueError("Beta distribution values must be in the interval [0, 1]") + rvs = self._validate_rvs(rvs) n = len(rvs) m = kwargs.get("m", n // 4) @@ -865,9 +856,7 @@ def execute_statistic(self, rvs, **kwargs): statistic : float The test statistic value """ - rvs = np.asarray(rvs) - if np.any((rvs < 0) | (rvs > 1)): - raise ValueError("Beta distribution values must be in the interval [0, 1]") + rvs = self._validate_rvs(rvs) n = len(rvs) From 217a8f323437eb6da01a95cb56d195be2ad95450 Mon Sep 17 00:00:00 2001 From: Alexey Mironov Date: Mon, 15 Dec 2025 13:59:33 +0300 Subject: [PATCH 6/6] Add short_codes --- pysatl_criterion/statistics/beta.py | 96 +++++++++++++++++++++++++---- 1 file changed, 84 insertions(+), 12 deletions(-) diff --git a/pysatl_criterion/statistics/beta.py b/pysatl_criterion/statistics/beta.py index f9fe637..1e6696c 100644 --- a/pysatl_criterion/statistics/beta.py +++ b/pysatl_criterion/statistics/beta.py @@ -91,10 +91,16 @@ def __init__(self, alpha=1, beta=1, alternative="two-sided", mode="auto"): AbstractBetaGofStatistic.__init__(self, alpha, beta) KSStatistic.__init__(self, alternative, mode) + @staticmethod + @override + def short_code(): + return "KS" + @staticmethod @override def code(): - return f"KS_{AbstractBetaGofStatistic.code()}" + short_code = KolmogorovSmirnovBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" @override def execute_statistic(self, rvs, **kwargs): @@ -144,10 +150,16 @@ class AndersonDarlingBetaGofStatistic(AbstractBetaGofStatistic, ADStatistic): The Annals of Mathematical Statistics, 23(2), 193-212. """ + @staticmethod + @override + def short_code(): + return "AD" + @staticmethod @override def code(): - return f"AD_{AbstractBetaGofStatistic.code()}" + short_code = AndersonDarlingBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" @override def execute_statistic(self, rvs, **kwargs): @@ -207,10 +219,16 @@ class CrammerVonMisesBetaGofStatistic(AbstractBetaGofStatistic, CrammerVonMisesS Julius Springer. """ + @staticmethod + @override + def short_code(): + return "CVM" + @staticmethod @override def code(): - return f"CVM_{AbstractBetaGofStatistic.code()}" + short_code = CrammerVonMisesBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" @override def execute_statistic(self, rvs, **kwargs): @@ -260,10 +278,16 @@ class LillieforsTestBetaGofStatistic(AbstractBetaGofStatistic, LillieforsTest): Association, 62(318), 399-402. """ + @staticmethod + @override + def short_code(): + return "LILLIE" + @staticmethod @override def code(): - return f"LILLIE_{AbstractBetaGofStatistic.code()}" + short_code = LillieforsTestBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" @override def execute_statistic(self, rvs, **kwargs): @@ -322,10 +346,16 @@ def __init__(self, alpha=1, beta=1, lambda_=1): Chi2Statistic.__init__(self) self.lambda_ = lambda_ + @staticmethod + @override + def short_code(): + return "CHI2_PEARSON" + @staticmethod @override def code(): - return f"CHI2_PEARSON_{AbstractBetaGofStatistic.code()}" + short_code = Chi2PearsonBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" @override def execute_statistic(self, rvs, **kwargs): @@ -385,10 +415,16 @@ class WatsonBetaGofStatistic(AbstractBetaGofStatistic): Biometrika, 48(1/2), 109-114. """ + @staticmethod + @override + def short_code(): + return "W" + @staticmethod @override def code(): - return f"W_{AbstractBetaGofStatistic.code()}" + short_code = WatsonBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" @override def execute_statistic(self, rvs, **kwargs): @@ -447,10 +483,16 @@ class KuiperBetaGofStatistic(AbstractBetaGofStatistic): Indagationes Mathematicae, 63, 38-47. """ + @staticmethod + @override + def short_code(): + return "KUIPER" + @staticmethod @override def code(): - return f"KUIPER_{AbstractBetaGofStatistic.code()}" + short_code = KuiperBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" @override def execute_statistic(self, rvs, **kwargs): @@ -513,10 +555,16 @@ class MomentBasedBetaGofStatistic(AbstractBetaGofStatistic): John Wiley & Sons. """ + @staticmethod + @override + def short_code(): + return "MB" + @staticmethod @override def code(): - return f"MB_{AbstractBetaGofStatistic.code()}" + short_code = MomentBasedBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" @override def execute_statistic(self, rvs, **kwargs): @@ -586,10 +634,16 @@ class SkewnessKurtosisBetaGofStatistic(AbstractBetaGofStatistic): Economics Letters, 6(3), 255-259. """ + @staticmethod + @override + def short_code(): + return "SK" + @staticmethod @override def code(): - return f"SK_{AbstractBetaGofStatistic.code()}" + short_code = SkewnessKurtosisBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" @override def execute_statistic(self, rvs, **kwargs): @@ -666,10 +720,16 @@ class RatioBetaGofStatistic(AbstractBetaGofStatistic): equations with applications to stochastic models. John Wiley & Sons. """ + @staticmethod + @override + def short_code(): + return "RT" + @staticmethod @override def code(): - return f"RT_{AbstractBetaGofStatistic.code()}" + short_code = RatioBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" @override def execute_statistic(self, rvs, **kwargs): @@ -745,10 +805,16 @@ class EntropyBetaGofStatistic(AbstractBetaGofStatistic): 38(1), 54-59. """ + @staticmethod + @override + def short_code(): + return "ENT" + @staticmethod @override def code(): - return f"ENT_{AbstractBetaGofStatistic.code()}" + short_code = EntropyBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" @override def execute_statistic(self, rvs, **kwargs): @@ -834,10 +900,16 @@ def __init__(self, alpha=2, beta=2): raise ValueError("beta must be greater than 1 for mode to be well-defined") super().__init__(alpha, beta) + @staticmethod + @override + def short_code(): + return "MODE" + @staticmethod @override def code(): - return f"MODE_{AbstractBetaGofStatistic.code()}" + short_code = ModeBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" @override def execute_statistic(self, rvs, **kwargs):