diff --git a/pysatl_criterion/statistics/__init__.py b/pysatl_criterion/statistics/__init__.py index f565e4b..31eaa6f 100644 --- a/pysatl_criterion/statistics/__init__.py +++ b/pysatl_criterion/statistics/__init__.py @@ -1,3 +1,18 @@ +from pysatl_criterion.statistics.beta import ( + AbstractBetaGofStatistic, + AndersonDarlingBetaGofStatistic, + Chi2PearsonBetaGofStatistic, + CrammerVonMisesBetaGofStatistic, + EntropyBetaGofStatistic, + KolmogorovSmirnovBetaGofStatistic, + KuiperBetaGofStatistic, + LillieforsTestBetaGofStatistic, + ModeBetaGofStatistic, + MomentBasedBetaGofStatistic, + RatioBetaGofStatistic, + SkewnessKurtosisBetaGofStatistic, + WatsonBetaGofStatistic, +) from pysatl_criterion.statistics.common import ( ADStatistic, Chi2Statistic, @@ -139,26 +154,45 @@ "ADStatistic", "Chi2Statistic", "CrammerVonMisesStatistic", - "KolmogorovSmirnovWeibullGofStatistic", - "SBWeibullGofStatistic", - "SbWeibullGofStatistic", - "RSBWeibullGofStatistic", + "KSStatistic", "LillieforsTest", "MinToshiyukiStatistic", - "WPPWeibullGofStatistic", - "NormalizeSpaceWeibullGofStatistic", + # Beta distribution statistics + "AbstractBetaGofStatistic", + "AndersonDarlingBetaGofStatistic", + "Chi2PearsonBetaGofStatistic", + "CrammerVonMisesBetaGofStatistic", + "EntropyBetaGofStatistic", + "KolmogorovSmirnovBetaGofStatistic", + "KuiperBetaGofStatistic", + "LillieforsTestBetaGofStatistic", + "ModeBetaGofStatistic", + "MomentBasedBetaGofStatistic", + "RatioBetaGofStatistic", + "SkewnessKurtosisBetaGofStatistic", + "WatsonBetaGofStatistic", + # Weibull distribution statistics "AbstractWeibullGofStatistic", - "TikuSinghWeibullGofStatistic", - "ST2WeibullGofStatistic", - "ST1WeibullGofStatistic", - "SPPWeibullGofStatistic", - "REJGWeibullGofStatistic", - "OKWeibullGofStatistic", - "MSFWeibullGofStatistic", - "MinToshiyukiWeibullGofStatistic", + "AndersonDarlingWeibullGofStatistic", + "Chi2PearsonWeibullGofStatistic", "CrammerVonMisesWeibullGofStatistic", + "KolmogorovSmirnovWeibullGofStatistic", "LillieforsWeibullGofStatistic", "LOSWeibullGofStatistic", + "MinToshiyukiWeibullGofStatistic", + "MSFWeibullGofStatistic", + "NormalizeSpaceWeibullGofStatistic", + "OKWeibullGofStatistic", + "REJGWeibullGofStatistic", + "RSBWeibullGofStatistic", + "SBWeibullGofStatistic", + "SbWeibullGofStatistic", + "SPPWeibullGofStatistic", + "ST1WeibullGofStatistic", + "ST2WeibullGofStatistic", + "TikuSinghWeibullGofStatistic", + "WPPWeibullGofStatistic", + # Exponential distribution statistics "Chi2PearsonWeibullGofStatistic", "AndersonDarlingWeibullGofStatistic", "AbstractGammaGofStatistic", diff --git a/pysatl_criterion/statistics/beta.py b/pysatl_criterion/statistics/beta.py new file mode 100644 index 0000000..1e6696c --- /dev/null +++ b/pysatl_criterion/statistics/beta.py @@ -0,0 +1,949 @@ +from abc import ABC + +import numpy as np +import scipy.stats as scipy_stats +from typing_extensions import override + +from pysatl_criterion.statistics.common import ( + ADStatistic, + Chi2Statistic, + CrammerVonMisesStatistic, + KSStatistic, + LillieforsTest, +) +from pysatl_criterion.statistics.goodness_of_fit import AbstractGoodnessOfFitStatistic + + +class AbstractBetaGofStatistic(AbstractGoodnessOfFitStatistic, ABC): + """ + Abstract base class for Beta distribution goodness-of-fit statistics. + + The Beta distribution is a continuous probability distribution defined on the interval [0, 1] + parameterized by two positive shape parameters, denoted by α (alpha) and β (beta). + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + """ + + def __init__(self, alpha=1, beta=1): + if alpha <= 0: + raise ValueError("alpha must be positive") + if beta <= 0: + raise ValueError("beta must be positive") + self.alpha = alpha + self.beta = beta + + @staticmethod + def _validate_rvs(rvs, open_interval=False): + rvs = np.asarray(rvs) + if open_interval: + # All values strictly between 0 and 1 + if np.any((rvs <= 0) | (rvs >= 1)): + raise ValueError( + "Beta distribution values must be in the open interval (0, 1) for this test" + ) + else: + # All values in [0, 1] + if np.any((rvs < 0) | (rvs > 1)): + raise ValueError("Beta distribution values must be in the interval [0, 1]") + return rvs + + @staticmethod + @override + def code(): + return f"BETA_{AbstractGoodnessOfFitStatistic.code()}" + + +class KolmogorovSmirnovBetaGofStatistic(AbstractBetaGofStatistic, KSStatistic): + """ + Kolmogorov-Smirnov test statistic for Beta distribution. + + The Kolmogorov-Smirnov test compares the empirical distribution function + with the theoretical Beta distribution function. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + alternative : {'two-sided', 'less', 'greater'}, optional + Defines the alternative hypothesis. Default is 'two-sided'. + mode : {'auto', 'exact', 'approx', 'asymp'}, optional + Defines the distribution used for calculating the p-value. Default is 'auto'. + + Returns + ------- + statistic : float + The Kolmogorov-Smirnov test statistic + + References + ---------- + .. [1] Massey, F. J. (1951). The Kolmogorov-Smirnov test for goodness of fit. + Journal of the American Statistical Association, 46(253), 68-78. + """ + + def __init__(self, alpha=1, beta=1, alternative="two-sided", mode="auto"): + AbstractBetaGofStatistic.__init__(self, alpha, beta) + KSStatistic.__init__(self, alternative, mode) + + @staticmethod + @override + def short_code(): + return "KS" + + @staticmethod + @override + def code(): + short_code = KolmogorovSmirnovBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the Kolmogorov-Smirnov test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = self._validate_rvs(rvs) + + rvs_sorted = np.sort(rvs) + cdf_vals = scipy_stats.beta.cdf(rvs_sorted, self.alpha, self.beta) + return KSStatistic.execute_statistic(self, rvs_sorted, cdf_vals) + + +class AndersonDarlingBetaGofStatistic(AbstractBetaGofStatistic, ADStatistic): + """ + Anderson-Darling test statistic for Beta distribution. + + The Anderson-Darling test is a modification of the Kolmogorov-Smirnov test + that gives more weight to the tails of the distribution. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The Anderson-Darling test statistic + + References + ---------- + .. [1] Anderson, T. W., & Darling, D. A. (1952). Asymptotic theory of certain + "goodness of fit" criteria based on stochastic processes. + The Annals of Mathematical Statistics, 23(2), 193-212. + """ + + @staticmethod + @override + def short_code(): + return "AD" + + @staticmethod + @override + def code(): + short_code = AndersonDarlingBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the Anderson-Darling test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = self._validate_rvs(rvs) + + rvs_sorted = np.sort(rvs) + n = len(rvs_sorted) + + # Compute log CDF and log SF (survival function) + logcdf = scipy_stats.beta.logcdf(rvs_sorted, self.alpha, self.beta) + logsf = scipy_stats.beta.logsf(rvs_sorted, self.alpha, self.beta) + + i = np.arange(1, n + 1) + A2 = -n - np.sum((2 * i - 1.0) / n * (logcdf + logsf[::-1])) + + return A2 + + +class CrammerVonMisesBetaGofStatistic(AbstractBetaGofStatistic, CrammerVonMisesStatistic): + """ + Cramér-von Mises test statistic for Beta distribution. + + The Cramér-von Mises test is a goodness-of-fit test that measures the + discrepancy between the empirical distribution function and the theoretical + cumulative distribution function. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The Cramér-von Mises test statistic + + References + ---------- + .. [1] Cramér, H. (1928). On the composition of elementary errors. + Scandinavian Actuarial Journal, 1928(1), 13-74. + .. [2] von Mises, R. (1928). Wahrscheinlichkeit, Statistik und Wahrheit. + Julius Springer. + """ + + @staticmethod + @override + def short_code(): + return "CVM" + + @staticmethod + @override + def code(): + short_code = CrammerVonMisesBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the Cramér-von Mises test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = self._validate_rvs(rvs) + + rvs_sorted = np.sort(rvs) + cdf_vals = scipy_stats.beta.cdf(rvs_sorted, self.alpha, self.beta) + return CrammerVonMisesStatistic.execute_statistic(self, rvs_sorted, cdf_vals) + + +class LillieforsTestBetaGofStatistic(AbstractBetaGofStatistic, LillieforsTest): + """ + Lilliefors test statistic for Beta distribution. + + The Lilliefors test is a modification of the Kolmogorov-Smirnov test for + the case when parameters are estimated from the data. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The Lilliefors test statistic + + References + ---------- + .. [1] Lilliefors, H. W. (1967). On the Kolmogorov-Smirnov test for normality + with mean and variance unknown. Journal of the American Statistical + Association, 62(318), 399-402. + """ + + @staticmethod + @override + def short_code(): + return "LILLIE" + + @staticmethod + @override + def code(): + short_code = LillieforsTestBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the Lilliefors test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = self._validate_rvs(rvs) + + rvs_sorted = np.sort(rvs) + cdf_vals = scipy_stats.beta.cdf(rvs_sorted, self.alpha, self.beta) + return LillieforsTest.execute_statistic(self, rvs_sorted, cdf_vals) + + +class Chi2PearsonBetaGofStatistic(AbstractBetaGofStatistic, Chi2Statistic): + """ + Pearson's Chi-squared test statistic for Beta distribution. + + The chi-squared test compares observed frequencies with expected frequencies + based on the Beta distribution. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + lambda_ : float, default=1 + Power divergence parameter. Lambda=1 gives Pearson's chi-squared statistic. + + Returns + ------- + statistic : float + The chi-squared test statistic + + References + ---------- + .. [1] Pearson, K. (1900). On the criterion that a given system of deviations from + the probable in the case of a correlated system of variables is such that + it can be reasonably supposed to have arisen from random sampling. + The London, Edinburgh, and Dublin Philosophical Magazine and Journal of + Science, 50(302), 157-175. + """ + + def __init__(self, alpha=1, beta=1, lambda_=1): + AbstractBetaGofStatistic.__init__(self, alpha, beta) + Chi2Statistic.__init__(self) + self.lambda_ = lambda_ + + @staticmethod + @override + def short_code(): + return "CHI2_PEARSON" + + @staticmethod + @override + def code(): + short_code = Chi2PearsonBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the Pearson's chi-squared test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = self._validate_rvs(rvs) + + rvs_sorted = np.sort(rvs) + n = len(rvs_sorted) + + # Number of bins using Sturges' rule + num_bins = int(np.ceil(np.sqrt(n))) + + # Create histogram + observed, bin_edges = np.histogram(rvs_sorted, bins=num_bins, range=(0, 1)) + + # Calculate expected frequencies + expected_cdf = scipy_stats.beta.cdf(bin_edges, self.alpha, self.beta) + expected = np.diff(expected_cdf) * n + + return Chi2Statistic.execute_statistic(self, observed, expected, self.lambda_) + + +class WatsonBetaGofStatistic(AbstractBetaGofStatistic): + """ + Watson test statistic for Beta distribution. + + The Watson test is a modification of the Cramér-von Mises test that is + invariant under location changes. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The Watson test statistic + + References + ---------- + .. [1] Watson, G. S. (1961). Goodness-of-fit tests on a circle. + Biometrika, 48(1/2), 109-114. + """ + + @staticmethod + @override + def short_code(): + return "W" + + @staticmethod + @override + def code(): + short_code = WatsonBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the Watson test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = self._validate_rvs(rvs) + + rvs_sorted = np.sort(rvs) + n = len(rvs_sorted) + cdf_vals = scipy_stats.beta.cdf(rvs_sorted, self.alpha, self.beta) + + # Cramér-von Mises statistic + u = (2 * np.arange(1, n + 1) - 1) / (2 * n) + cvm = 1 / (12 * n) + np.sum((u - cdf_vals) ** 2) + + # Watson correction + correction_term = n * (np.mean(cdf_vals) - 0.5) ** 2 + watson_statistic = cvm - correction_term + + return watson_statistic + + +class KuiperBetaGofStatistic(AbstractBetaGofStatistic): + """ + Kuiper test statistic for Beta distribution. + + The Kuiper test is a variant of the Kolmogorov-Smirnov test that is + more sensitive to deviations in the tails of the distribution. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The Kuiper test statistic (D+ + D-) + + References + ---------- + .. [1] Kuiper, N. H. (1960). Tests concerning random points on a circle. + Indagationes Mathematicae, 63, 38-47. + """ + + @staticmethod + @override + def short_code(): + return "KUIPER" + + @staticmethod + @override + def code(): + short_code = KuiperBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the Kuiper test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value (D+ + D-) + """ + rvs = self._validate_rvs(rvs) + + rvs_sorted = np.sort(rvs) + n = len(rvs_sorted) + cdf_vals = scipy_stats.beta.cdf(rvs_sorted, self.alpha, self.beta) + + # D+ = max(i/n - F(x_i)) + d_plus = np.max(np.arange(1, n + 1) / n - cdf_vals) + + # D- = max(F(x_i) - (i-1)/n) + d_minus = np.max(cdf_vals - np.arange(0, n) / n) + + # Kuiper statistic is D+ + D- + return d_plus + d_minus + + +class MomentBasedBetaGofStatistic(AbstractBetaGofStatistic): + """ + Moment-based test statistic for Beta distribution. + + This test compares the sample moments with the theoretical moments of + the Beta distribution. It uses the first two moments (mean and variance). + + For Beta(α, β): + - Mean: μ = α/(α+β) + - Variance: σ² = αβ/[(α+β)²(α+β+1)] + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The moment-based test statistic + + References + ---------- + .. [1] Johnson, N. L., Kotz, S., & Balakrishnan, N. (1995). + Continuous univariate distributions, volume 2 (Vol. 289). + John Wiley & Sons. + """ + + @staticmethod + @override + def short_code(): + return "MB" + + @staticmethod + @override + def code(): + short_code = MomentBasedBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the moment-based test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = self._validate_rvs(rvs) + + n = len(rvs) + + # Sample moments + sample_mean = np.mean(rvs) + sample_var = np.var(rvs, ddof=1) + + # Theoretical moments + alpha_beta_sum = self.alpha + self.beta + theoretical_mean = self.alpha / alpha_beta_sum + theoretical_var = (self.alpha * self.beta) / (alpha_beta_sum**2 * (alpha_beta_sum + 1)) + + # Test statistic based on standardized differences + mean_diff = np.sqrt(n) * (sample_mean - theoretical_mean) / np.sqrt(theoretical_var) + var_diff = np.sqrt(n) * (sample_var - theoretical_var) / np.sqrt(theoretical_var) + + # Combined statistic + statistic = mean_diff**2 + var_diff**2 + + return statistic + + +class SkewnessKurtosisBetaGofStatistic(AbstractBetaGofStatistic): + """ + Skewness-Kurtosis test statistic for Beta distribution. + + This test compares the sample skewness and kurtosis with the theoretical + values of the Beta distribution. + + For Beta(α, β): + - Skewness: γ₁ = 2(β-α)√(α+β+1) / [(α+β+2)√(αβ)] + - Kurtosis: γ₂ = 6[(α-β)²(α+β+1) - αβ(α+β+2)] / [αβ(α+β+2)(α+β+3)] + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The skewness-kurtosis test statistic + + References + ---------- + .. [1] Jarque, C. M., & Bera, A. K. (1980). Efficient tests for normality, + homoscedasticity and serial independence of regression residuals. + Economics Letters, 6(3), 255-259. + """ + + @staticmethod + @override + def short_code(): + return "SK" + + @staticmethod + @override + def code(): + short_code = SkewnessKurtosisBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the skewness-kurtosis test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = self._validate_rvs(rvs) + + n = len(rvs) + + # Sample skewness and kurtosis + from scipy.stats import kurtosis, skew + + sample_skewness = skew(rvs, bias=False) + sample_kurtosis = kurtosis(rvs, bias=False) + + # Theoretical skewness and kurtosis + alpha_beta_sum = self.alpha + self.beta + theoretical_skewness = ( + 2 + * (self.beta - self.alpha) + * np.sqrt(alpha_beta_sum + 1) + / ((alpha_beta_sum + 2) * np.sqrt(self.alpha * self.beta)) + ) + + numerator = 6 * ( + (self.alpha - self.beta) ** 2 * (alpha_beta_sum + 1) + - self.alpha * self.beta * (alpha_beta_sum + 2) + ) + denominator = self.alpha * self.beta * (alpha_beta_sum + 2) * (alpha_beta_sum + 3) + theoretical_kurtosis = numerator / denominator + + # Test statistic (similar to Jarque-Bera) + skew_diff = (sample_skewness - theoretical_skewness) ** 2 + kurt_diff = (sample_kurtosis - theoretical_kurtosis) ** 2 + + statistic = (n / 6) * skew_diff + (n / 24) * kurt_diff + + return statistic + + +class RatioBetaGofStatistic(AbstractBetaGofStatistic): + """ + Ratio test statistic for Beta distribution. + + This test is based on the ratio of geometric mean to arithmetic mean, + which has a known relationship for the Beta distribution. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The ratio test statistic + + References + ---------- + .. [1] Rao, C. R., & Shanbhag, D. N. (1994). Choquet-Deny type functional + equations with applications to stochastic models. John Wiley & Sons. + """ + + @staticmethod + @override + def short_code(): + return "RT" + + @staticmethod + @override + def code(): + short_code = RatioBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the ratio test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = self._validate_rvs(rvs, open_interval=True) + + n = len(rvs) + + # Sample statistics + arithmetic_mean = np.mean(rvs) + geometric_mean = np.exp(np.mean(np.log(rvs))) + + # Avoid division by zero + if arithmetic_mean == 0: + raise ValueError("Arithmetic mean is zero, cannot compute ratio") + + sample_ratio = geometric_mean / arithmetic_mean + + # Theoretical ratio for Beta distribution + # E[X] = α/(α+β) + # E[log X] = ψ(α) - ψ(α+β) where ψ is digamma function + from scipy.special import psi + + theoretical_mean = self.alpha / (self.alpha + self.beta) + theoretical_log_mean = psi(self.alpha) - psi(self.alpha + self.beta) + theoretical_ratio = np.exp(theoretical_log_mean) / theoretical_mean + + # Test statistic + statistic = np.sqrt(n) * np.abs(sample_ratio - theoretical_ratio) + + return statistic + + +class EntropyBetaGofStatistic(AbstractBetaGofStatistic): + """ + Entropy-based test statistic for Beta distribution. + + This test compares the sample entropy (estimated using kernel density) + with the theoretical entropy of the Beta distribution. + + The differential entropy of Beta(α, β) is: + H = ln(B(α,β)) - (α-1)ψ(α) - (β-1)ψ(β) + (α+β-2)ψ(α+β) + where B is the Beta function and ψ is the digamma function. + + Parameters + ---------- + alpha : float, default=1 + Shape parameter α > 0 + beta : float, default=1 + Shape parameter β > 0 + + Returns + ------- + statistic : float + The entropy-based test statistic + + References + ---------- + .. [1] Vasicek, O. (1976). A test for normality based on sample entropy. + Journal of the Royal Statistical Society: Series B (Methodological), + 38(1), 54-59. + """ + + @staticmethod + @override + def short_code(): + return "ENT" + + @staticmethod + @override + def code(): + short_code = EntropyBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the entropy-based test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + m : int, optional + Window size for entropy estimation. Default is n//4. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = self._validate_rvs(rvs) + + n = len(rvs) + m = kwargs.get("m", n // 4) + + # Sort the data + rvs_sorted = np.sort(rvs) + + # Vasicek's entropy estimator + sample_entropy = 0 + for i in range(n): + left = max(0, i - m) + right = min(n - 1, i + m) + window_range = rvs_sorted[right] - rvs_sorted[left] + if window_range > 0: + sample_entropy += np.log(window_range * n / (2 * m)) + + sample_entropy /= n + + # Theoretical entropy + from scipy.special import betaln, psi + + theoretical_entropy = ( + betaln(self.alpha, self.beta) + - (self.alpha - 1) * psi(self.alpha) + - (self.beta - 1) * psi(self.beta) + + (self.alpha + self.beta - 2) * psi(self.alpha + self.beta) + ) + + # Test statistic + statistic = np.sqrt(n) * np.abs(sample_entropy - theoretical_entropy) + + return statistic + + +class ModeBetaGofStatistic(AbstractBetaGofStatistic): + """ + Mode-based test statistic for Beta distribution. + + For Beta(α, β) with α, β > 1, the mode is (α-1)/(α+β-2). + This test compares the sample mode (estimated) with the theoretical mode. + + Parameters + ---------- + alpha : float, default=2 + Shape parameter α > 1 + beta : float, default=2 + Shape parameter β > 1 + + Returns + ------- + statistic : float + The mode-based test statistic + + References + ---------- + .. [1] Chernoff, H. (1964). Estimation of the mode. + Annals of the Institute of Statistical Mathematics, 16(1), 31-41. + """ + + def __init__(self, alpha=2, beta=2): + if alpha <= 1: + raise ValueError("alpha must be greater than 1 for mode to be well-defined") + if beta <= 1: + raise ValueError("beta must be greater than 1 for mode to be well-defined") + super().__init__(alpha, beta) + + @staticmethod + @override + def short_code(): + return "MODE" + + @staticmethod + @override + def code(): + short_code = ModeBetaGofStatistic.short_code() + return f"{short_code}_{AbstractBetaGofStatistic.code()}" + + @override + def execute_statistic(self, rvs, **kwargs): + """ + Execute the mode-based test statistic. + + Parameters + ---------- + rvs : array_like + Array of sample data from Beta distribution. Values should be in [0, 1]. + bandwidth : float, optional + Bandwidth for kernel density estimation. If None, uses Scott's rule. + + Returns + ------- + statistic : float + The test statistic value + """ + rvs = self._validate_rvs(rvs) + + n = len(rvs) + + # Estimate sample mode using kernel density estimation + from scipy.stats import gaussian_kde + + kde = gaussian_kde(rvs) + x_grid = np.linspace(0.01, 0.99, 200) + density = kde(x_grid) + sample_mode = x_grid[np.argmax(density)] + + # Theoretical mode + theoretical_mode = (self.alpha - 1) / (self.alpha + self.beta - 2) + + # Test statistic + statistic = np.sqrt(n) * np.abs(sample_mode - theoretical_mode) + + return statistic diff --git a/tests/statistics/test_beta.py b/tests/statistics/test_beta.py new file mode 100644 index 0000000..39589f4 --- /dev/null +++ b/tests/statistics/test_beta.py @@ -0,0 +1,1099 @@ +import numpy as np +import pytest +import scipy.stats as scipy_stats + +from pysatl_criterion.statistics.beta import ( + AbstractBetaGofStatistic, + AndersonDarlingBetaGofStatistic, + Chi2PearsonBetaGofStatistic, + CrammerVonMisesBetaGofStatistic, + EntropyBetaGofStatistic, + KolmogorovSmirnovBetaGofStatistic, + KuiperBetaGofStatistic, + LillieforsTestBetaGofStatistic, + ModeBetaGofStatistic, + MomentBasedBetaGofStatistic, + RatioBetaGofStatistic, + SkewnessKurtosisBetaGofStatistic, + WatsonBetaGofStatistic, +) + + +def test_abstract_beta_criterion_code(): + """Test that the abstract Beta class returns correct code.""" + assert "BETA_GOODNESS_OF_FIT" == AbstractBetaGofStatistic.code() + + +def test_abstract_beta_criterion_parameters(): + """Test parameter validation for AbstractBetaGofStatistic.""" + # Test with concrete class since AbstractBetaGofStatistic is abstract + # Valid parameters + stat = KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=3) + assert stat.alpha == 2 + assert stat.beta == 3 + + # Invalid alpha + with pytest.raises(ValueError, match="alpha must be positive"): + KolmogorovSmirnovBetaGofStatistic(alpha=-1, beta=2) + + with pytest.raises(ValueError, match="alpha must be positive"): + KolmogorovSmirnovBetaGofStatistic(alpha=0, beta=2) + + # Invalid beta + with pytest.raises(ValueError, match="beta must be positive"): + KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=-1) + + with pytest.raises(ValueError, match="beta must be positive"): + KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=0) + + +class TestKolmogorovSmirnovBetaGofStatistic: + """Tests for Kolmogorov-Smirnov test statistic.""" + + def test_code(self): + """Test that the KS statistic returns correct code.""" + assert "KS_BETA_GOODNESS_OF_FIT" == KolmogorovSmirnovBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("data", "alpha", "beta", "result"), + [ + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.1877694, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 0.2081872, + ), + ( + [ + 0.6994383163, + 0.5174900183, + 0.5925767197, + 0.5611698399, + 0.9395027265, + 0.6820294051, + 0.4379703453, + 0.5303633637, + ], + 0.5, + 0.5, + 0.4604087, + ), + ( + [ + 0.7482953444, + 0.8762706351, + 0.4603376651, + 0.7471648301, + 0.8904978771, + 0.7206587486, + 0.8250535532, + 0.7494859221, + 0.8777684524, + 0.9167142296, + 0.9568773740, + ], + 5, + 2, + 0.3749592, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.2160485, + ), + ], + ) + def test_ks_with_parametrized_data(self, data, alpha, beta, result): + """Test KS statistic with precomputed expected values.""" + stat = KolmogorovSmirnovBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + assert result == pytest.approx(statistic_value, 0.00001) + + def test_ks_with_wrong_distribution(self): + """Test KS statistic with data from wrong distribution.""" + np.random.seed(42) + # Generate from Beta(5, 2) but test against Beta(2, 5) + data = scipy_stats.beta.rvs(5, 2, size=100) + + stat = KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=5) + statistic_value = stat.execute_statistic(data) + + # Should detect the mismatch + assert statistic_value > 0.1 + + def test_ks_validation_errors(self): + """Test that KS statistic validates input data.""" + stat = KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=3) + + # Data outside [0, 1] + with pytest.raises(ValueError, match="Beta distribution values must be in the interval"): + stat.execute_statistic([0.5, 1.5, 0.3]) + + with pytest.raises(ValueError, match="Beta distribution values must be in the interval"): + stat.execute_statistic([-0.1, 0.5, 0.8]) + + +class TestAndersonDarlingBetaGofStatistic: + """Tests for Anderson-Darling test statistic.""" + + def test_code(self): + """Test that the AD statistic returns correct code.""" + assert "AD_BETA_GOODNESS_OF_FIT" == AndersonDarlingBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("data", "alpha", "beta", "result"), + [ + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.3746031, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 0.7490749, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.3998264, + ), + ], + ) + def test_ad_with_parametrized_data(self, data, alpha, beta, result): + """Test AD statistic with precomputed expected values.""" + stat = AndersonDarlingBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + assert result == pytest.approx(statistic_value, 0.00001) + + def test_ad_validation_errors(self): + """Test that AD statistic validates input data.""" + stat = AndersonDarlingBetaGofStatistic(alpha=2, beta=3) + + with pytest.raises(ValueError, match="Beta distribution values must be in the interval"): + stat.execute_statistic([0.5, 1.5, 0.3]) + + +class TestCrammerVonMisesBetaGofStatistic: + """Tests for Cramér-von Mises test statistic.""" + + def test_code(self): + """Test that the CVM statistic returns correct code.""" + assert "CVM_BETA_GOODNESS_OF_FIT" == CrammerVonMisesBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("data", "alpha", "beta", "result"), + [ + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.0565441, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 0.1208704, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.0692098, + ), + ], + ) + def test_cvm_with_parametrized_data(self, data, alpha, beta, result): + """Test CVM statistic with precomputed expected values.""" + stat = CrammerVonMisesBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + assert result == pytest.approx(statistic_value, 0.00001) + + def test_cvm_validation_errors(self): + """Test that CVM statistic validates input data.""" + stat = CrammerVonMisesBetaGofStatistic(alpha=2, beta=3) + + with pytest.raises(ValueError, match="Beta distribution values must be in the interval"): + stat.execute_statistic([0.5, 1.5, 0.3]) + + +class TestLillieforsTestBetaGofStatistic: + """Tests for Lilliefors test statistic.""" + + def test_code(self): + """Test that the Lilliefors statistic returns correct code.""" + assert "LILLIE_BETA_GOODNESS_OF_FIT" == LillieforsTestBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("data", "alpha", "beta", "result"), + [ + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.1877694, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.2160485, + ), + ], + ) + def test_lillie_with_parametrized_data(self, data, alpha, beta, result): + """Test Lilliefors statistic with precomputed expected values.""" + stat = LillieforsTestBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + assert result == pytest.approx(statistic_value, 0.00001) + + +class TestChi2PearsonBetaGofStatistic: + """Tests for Pearson's Chi-squared test statistic.""" + + def test_code(self): + """Test that the Chi-squared statistic returns correct code.""" + assert "CHI2_PEARSON_BETA_GOODNESS_OF_FIT" == Chi2PearsonBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("alpha", "beta", "seed", "n"), + [ + (2, 5, 42, 100), + (3, 3, 123, 200), + ], + ) + def test_chi2_with_generated_data(self, alpha, beta, seed, n): + """Test Chi-squared statistic with data generated from Beta distribution.""" + np.random.seed(seed) + data = scipy_stats.beta.rvs(alpha, beta, size=n) + + stat = Chi2PearsonBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + + # Statistic should be non-negative + assert statistic_value >= 0 + assert np.isfinite(statistic_value) + + @pytest.mark.parametrize( + ("data", "alpha", "beta", "result"), + [ + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 1.30301816, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 3.33333333, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 1.13560740, + ), + ], + ) + def test_chi2_with_parametrized_data(self, data, alpha, beta, result): + """Test Chi-squared statistic with precomputed expected values.""" + stat = Chi2PearsonBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + assert result == pytest.approx(statistic_value, 0.00001) + + +class TestWatsonBetaGofStatistic: + """Tests for Watson test statistic.""" + + def test_code(self): + """Test that the Watson statistic returns correct code.""" + assert "W_BETA_GOODNESS_OF_FIT" == WatsonBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("data", "alpha", "beta", "result"), + [ + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.0302649, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 0.1096339, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.0430198, + ), + ], + ) + def test_watson_with_parametrized_data(self, data, alpha, beta, result): + """Test Watson statistic with precomputed expected values.""" + stat = WatsonBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + assert result == pytest.approx(statistic_value, 0.00001) + + +class TestKuiperBetaGofStatistic: + """Tests for Kuiper test statistic.""" + + def test_code(self): + """Test that the Kuiper statistic returns correct code.""" + assert "KUIPER_BETA_GOODNESS_OF_FIT" == KuiperBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("data", "alpha", "beta", "result"), + [ + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.2567761, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 0.3450400, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.2919412, + ), + ], + ) + def test_kuiper_with_parametrized_data(self, data, alpha, beta, result): + """Test Kuiper statistic with precomputed expected values.""" + stat = KuiperBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + assert result == pytest.approx(statistic_value, 0.00001) + + +class TestMomentBasedBetaGofStatistic: + """Tests for Moment-based test statistic.""" + + def test_code(self): + """Test that the Moment-based statistic returns correct code.""" + assert "MB_BETA_GOODNESS_OF_FIT" == MomentBasedBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("data", "alpha", "beta", "result"), + [ + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.2349020, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 0.3372358, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.2891104, + ), + ], + ) + def test_moment_with_parametrized_data(self, data, alpha, beta, result): + """Test Moment-based statistic with precomputed expected values.""" + stat = MomentBasedBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + assert result == pytest.approx(statistic_value, 0.00001) + + +class TestSkewnessKurtosisBetaGofStatistic: + """Tests for Skewness-Kurtosis test statistic.""" + + def test_code(self): + """Test that the Skewness-Kurtosis statistic returns correct code.""" + assert "SK_BETA_GOODNESS_OF_FIT" == SkewnessKurtosisBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("data", "alpha", "beta", "result"), + [ + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.7277307, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 0.2648237, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.2303306, + ), + ], + ) + def test_sk_with_parametrized_data(self, data, alpha, beta, result): + """Test Skewness-Kurtosis statistic with precomputed expected values.""" + stat = SkewnessKurtosisBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + assert result == pytest.approx(statistic_value, 0.00001) + + +class TestRatioBetaGofStatistic: + """Tests for Ratio test statistic.""" + + def test_code(self): + """Test that the Ratio statistic returns correct code.""" + assert "RT_BETA_GOODNESS_OF_FIT" == RatioBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("alpha", "beta", "seed", "n"), + [ + (2, 5, 42, 100), + (3, 3, 123, 200), + ], + ) + def test_ratio_with_generated_data(self, alpha, beta, seed, n): + """Test Ratio statistic with data generated from Beta distribution.""" + np.random.seed(seed) + # Add small noise to avoid exact 0 or 1 + data = scipy_stats.beta.rvs(alpha, beta, size=n) + data = np.clip(data, 0.001, 0.999) + + stat = RatioBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + + # Statistic should be non-negative + assert statistic_value >= 0 + assert np.isfinite(statistic_value) + + @pytest.mark.parametrize( + ("data", "alpha", "beta", "result"), + [ + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.20054649, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 0.62062110, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.02561022, + ), + ], + ) + def test_ratio_with_parametrized_data(self, data, alpha, beta, result): + """Test Ratio statistic with precomputed expected values.""" + stat = RatioBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + assert result == pytest.approx(statistic_value, 0.00001) + + def test_ratio_validation_errors(self): + """Test that Ratio statistic validates input data.""" + stat = RatioBetaGofStatistic(alpha=2, beta=3) + + # Data with exact 0 or 1 (for log calculation) + with pytest.raises( + ValueError, match="Beta distribution values must be in the open interval" + ): + stat.execute_statistic([0.5, 1.0, 0.3]) + + +class TestEntropyBetaGofStatistic: + """Tests for Entropy-based test statistic.""" + + def test_code(self): + """Test that the Entropy statistic returns correct code.""" + assert "ENT_BETA_GOODNESS_OF_FIT" == EntropyBetaGofStatistic.code() + + @pytest.mark.parametrize( + ("alpha", "beta", "seed", "n"), + [ + (2, 5, 42, 100), + (3, 3, 123, 200), + ], + ) + def test_entropy_with_generated_data(self, alpha, beta, seed, n): + """Test Entropy statistic with data generated from Beta distribution.""" + np.random.seed(seed) + data = scipy_stats.beta.rvs(alpha, beta, size=n) + + stat = EntropyBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + + # Statistic should be non-negative + assert statistic_value >= 0 + assert np.isfinite(statistic_value) + + @pytest.mark.parametrize( + ("data", "alpha", "beta", "result"), + [ + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 1.46457551, + ), + ( + [ + 0.7087962001, + 0.2915205607, + 0.5508644650, + 0.8802250282, + 0.5098339187, + 0.6131471891, + 0.3176350910, + 0.1751690171, + 0.4660253747, + 0.5769342726, + 0.8436683688, + 0.4333839830, + ], + 1, + 1, + 1.64485770, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.86392069, + ), + ], + ) + def test_entropy_with_parametrized_data(self, data, alpha, beta, result): + """Test Entropy statistic with precomputed expected values.""" + stat = EntropyBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + assert result == pytest.approx(statistic_value, 0.00001) + + +class TestModeBetaGofStatistic: + """Tests for Mode-based test statistic.""" + + def test_code(self): + """Test that the Mode statistic returns correct code.""" + assert "MODE_BETA_GOODNESS_OF_FIT" == ModeBetaGofStatistic.code() + + def test_mode_parameters_validation(self): + """Test that Mode statistic validates alpha and beta > 1.""" + # Valid parameters + stat = ModeBetaGofStatistic(alpha=2, beta=2) + assert stat.alpha == 2 + assert stat.beta == 2 + + # Invalid alpha + with pytest.raises(ValueError, match="alpha must be greater than 1"): + ModeBetaGofStatistic(alpha=1, beta=2) + + # Invalid beta + with pytest.raises(ValueError, match="beta must be greater than 1"): + ModeBetaGofStatistic(alpha=2, beta=1) + + @pytest.mark.parametrize( + ("alpha", "beta", "seed", "n"), + [ + (2, 5, 42, 200), + (3, 3, 123, 300), + ], + ) + def test_mode_with_generated_data(self, alpha, beta, seed, n): + """Test Mode statistic with data generated from Beta distribution.""" + np.random.seed(seed) + data = scipy_stats.beta.rvs(alpha, beta, size=n) + + stat = ModeBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + + # Statistic should be non-negative + assert statistic_value >= 0 + assert np.isfinite(statistic_value) + + @pytest.mark.parametrize( + ("data", "alpha", "beta", "result"), + [ + ( + [ + 0.3536766572, + 0.2485580661, + 0.4159590873, + 0.1599675758, + 0.5502830781, + 0.1109452876, + 0.5098966418, + 0.1772703795, + 0.1982904719, + 0.3762367882, + ], + 2, + 5, + 0.03766130, + ), + ( + [ + 0.3366836981, + 0.5643169242, + 0.5625418098, + 0.4038462174, + 0.3593278603, + 0.7521677033, + 0.6460545301, + 0.7051711915, + 0.1494688017, + 0.8260848760, + ], + 3, + 3, + 0.39711215, + ), + ], + ) + def test_mode_with_parametrized_data(self, data, alpha, beta, result): + """Test Mode statistic with precomputed expected values.""" + stat = ModeBetaGofStatistic(alpha=alpha, beta=beta) + statistic_value = stat.execute_statistic(data) + assert result == pytest.approx(statistic_value, 0.00001) + + +# Integration tests +class TestBetaIntegration: + """Integration tests for Beta distribution statistics.""" + + def test_uniform_distribution(self): + """Test that Beta(1,1) equals uniform distribution.""" + np.random.seed(42) + data = np.random.uniform(0, 1, 100) + + # Should fit well to Beta(1, 1) + ks_stat = KolmogorovSmirnovBetaGofStatistic(alpha=1, beta=1) + ks_value = ks_stat.execute_statistic(data) + + # Should be relatively small + assert ks_value < 0.2 + + def test_all_statistics_run(self): + """Test that all statistics can be computed without errors.""" + np.random.seed(42) + data = scipy_stats.beta.rvs(2, 5, size=100) + # Clip for tests that need open interval + data_clipped = np.clip(data, 0.001, 0.999) + + statistics = [ + KolmogorovSmirnovBetaGofStatistic(alpha=2, beta=5), + AndersonDarlingBetaGofStatistic(alpha=2, beta=5), + CrammerVonMisesBetaGofStatistic(alpha=2, beta=5), + LillieforsTestBetaGofStatistic(alpha=2, beta=5), + Chi2PearsonBetaGofStatistic(alpha=2, beta=5), + WatsonBetaGofStatistic(alpha=2, beta=5), + KuiperBetaGofStatistic(alpha=2, beta=5), + MomentBasedBetaGofStatistic(alpha=2, beta=5), + SkewnessKurtosisBetaGofStatistic(alpha=2, beta=5), + EntropyBetaGofStatistic(alpha=2, beta=5), + ModeBetaGofStatistic(alpha=2, beta=5), + ] + + for stat in statistics: + try: + if isinstance(stat, RatioBetaGofStatistic): + value = stat.execute_statistic(data_clipped) + else: + value = stat.execute_statistic(data) + assert np.isfinite(value), f"Statistic {stat.code()} returned non-finite value" + except Exception as e: + pytest.fail(f"Statistic {stat.code()} raised an exception: {e}") + + # Also test Ratio with clipped data + ratio_stat = RatioBetaGofStatistic(alpha=2, beta=5) + ratio_value = ratio_stat.execute_statistic(data_clipped) + assert np.isfinite(ratio_value)