Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/pysatl_core/families/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@
from .builtins import __all__ as _builtins_all
from .configuration import configure_families_register
from .distribution import ParametricFamilyDistribution
from .exponential_family import (
ExponentialConjugateHyperparameters,
ExponentialFamily,
ExponentialFamilyParametrization,
NaturalExponentialFamily,
SpacePredicate,
SpacePredicateArray,
)
from .parametric_family import ParametricFamily
from .parametrizations import (
Parametrization,
Expand All @@ -34,6 +42,12 @@
"configure_families_register",
# builtins
*_builtins_all,
"ExponentialFamily",
"ExponentialFamilyParametrization",
"ExponentialConjugateHyperparameters",
"SpacePredicate",
"SpacePredicateArray",
"NaturalExponentialFamily",
]

del _builtins_all
363 changes: 363 additions & 0 deletions src/pysatl_core/families/exponential_family.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change ExponentialFamily name to ContinuousExponentialClassFamily and other derivals

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that NEF is actually a class of families where sufficient statistic is provided by identity function; Functionality of ExponentialClass can be derived from existing functionality for multiple parametrizations

For example add optional argument to conjugate_prior which will highlight parametrization name.

Original file line number Diff line number Diff line change
@@ -0,0 +1,363 @@
from __future__ import annotations

from collections.abc import Callable
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, cast

import numpy as np
from scipy.differentiate import jacobian
from scipy.integrate import nquad
from scipy.linalg import det

from pysatl_core.distributions import (
SamplingStrategy,
)
from pysatl_core.families.parametric_family import ParametricFamily
from pysatl_core.families.parametrizations import Parametrization, parametrization
from pysatl_core.types import (
DistributionType,
GenericCharacteristicName,
ParametrizationName,
)

if TYPE_CHECKING:
from pysatl_core.distributions.support import Support

type ParametrizedFunction = Callable[[Parametrization, Any], Any]
type SupportArg = Callable[[Parametrization], Support | None] | None


PDF = "pdf"
CDF = "cdf"
PPF = "ppf"
CF = "char_func"
MEAN = "mean"
VAR = "var"
SKEW = "skewness"
KURT = "kurtosis"
Comment on lines +30 to +37
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have EnumStr with the names of the characteristic. We should use it to avoid declarations in each file



@dataclass
class ExponentialFamilyParametrization(Parametrization):
"""
Standard parametrization of Exponential Family.
"""

theta: list[float]


class ExponentialConjugateHyperparameters:
def __init__(self, alpha: Any, beta: int):
self.alpha = alpha
self.beta = beta

def __str__(self) -> str:
return f"alpha={self.alpha}, beta={self.beta}"
Comment on lines +49 to +55
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use dataclass

@dataclass
class ExponentialConjugateHyperparameters:
    self.alpha : Any
    self.beta : int

    def __str__(self) -> str:
        return f"alpha={self.alpha}, beta={self.beta}"



def doesAccept(x: list[float] | float, support: list[tuple[float, float]]) -> bool:
if not hasattr(x, "__len__"):
x = [x]
Comment on lines +58 to +60
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not list in the annotations but Iterable or Sequence

Comment on lines +58 to +60
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Support can be list[ContinuousSupport] or just list[Interval1D]


x = cast(list[float], x)

def accept_1D(x: float, borders: tuple[float, float]) -> bool:
left, right = borders
if abs(x) == 0 and (abs(left) == 0 or abs(right) == 0):
return False
return left <= x <= right
Comment on lines +64 to +68
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be replaced with Interval1D.contains


return all(accept_1D(x_i, border) for x_i, border in zip(x, support, strict=False))


class SpacePredicate:
def __init__(self, predicate: Callable[[Any], bool]):
self._predicate = predicate

def accepts(self, x: Any) -> bool:
return self._predicate(x)

Comment on lines +73 to +79
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move to types.py as type allias


class SpacePredicateArray(SpacePredicate):
def __init__(self, space: list[tuple[float, float]]):
SpacePredicate.__init__(self, lambda x: doesAccept(x, space))
self._space = space
Comment on lines +81 to +84
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move to support.py; use interval_1d contains, or add doesAccept functionality to contains method



class NaturalExponentialFamily(ParametricFamily):
def __init__(
self,
*,
log_partition: Callable[[ExponentialFamilyParametrization], float],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since for density from Exponential class, one has to take product between vector of parameters and vector of statistics, we should

  1. either enforce both log_partition and sufficent_statistics had annotated arguments
  2. or leave checking correctness of argument order to user;

In the second case maybe it is better to use Callable[[npt.NDArray[np.float64]], np.float64]?

Currently, it seems that we have a kind of prototype to annotate natural parameters, but if want keep it, we should make possible to give arbitrary names to parameters.

sufficient_statistics: Callable[[Any], Any],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe enforce Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]]?

normalization_constant: Callable[[Any], Any],
support: SpacePredicate,
parameter_space: SpacePredicate,
sufficient_statistics_values: SpacePredicate,
name: str = "NaturalExponentialFamily",
distr_type: DistributionType | Callable[[Parametrization], DistributionType],
distr_parametrizations: list[ParametrizationName],
sampling_strategy: SamplingStrategy,
support_by_parametrization: SupportArg = None,
):
self._sufficient = sufficient_statistics
self._log_partition = log_partition
self._normalization = normalization_constant

self._support = support
self._parameter_space = parameter_space
self._sufficient_statistics_values = sufficient_statistics_values

distr_characteristics: dict[
GenericCharacteristicName,
dict[ParametrizationName, ParametrizedFunction] | ParametrizedFunction,
] = {
PDF: self.density,
MEAN: self._mean,
VAR: self._var,
}

ParametricFamily.__init__(
self,
name=name,
distr_type=distr_type,
distr_parametrizations=distr_parametrizations,
distr_characteristics=distr_characteristics,
sampling_strategy=sampling_strategy,
support_by_parametrization=support_by_parametrization,
)
parametrization(family=self, name="theta")(ExponentialFamilyParametrization)

def _transform_to_natural_parametrization(
self, theta_parametrization: ExponentialFamilyParametrization
) -> ExponentialFamilyParametrization:
return theta_parametrization

@property
def log_density(self) -> ParametrizedFunction:
def log_density_func(parametrization: Parametrization, x: Any) -> Any:
parametrization = cast(ExponentialFamilyParametrization, parametrization)
parametrization = self._transform_to_natural_parametrization(parametrization)
if not self._support.accepts(x):
return float("-inf")

theta = parametrization.theta
sufficient = self._sufficient(x)
dot = np.dot(theta, sufficient)
if hasattr(dot, "__len__"):
dot = dot[0]

result = float(
np.log(self._normalization(x)) + dot + self._log_partition(parametrization)
)
return result

return log_density_func

@property
def density(self) -> ParametrizedFunction:
return lambda parametrization, x: np.exp(self.log_density(parametrization, x))

@property
def conjugate_prior_family(self) -> NaturalExponentialFamily:
def conjugate_sufficient(
theta: float,
) -> list[Any]:
Comment on lines +163 to +165
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Accept multivariate parameter

if not self._parameter_space.accepts(theta):
return [float("-inf"), float("-inf")]

parametrization = ExponentialFamilyParametrization([theta])
return [
theta,
self._log_partition(parametrization),
]

def conjugate_log_partition(
parametrization: ExponentialFamilyParametrization,
) -> Any:
alpha = parametrization.theta[0]
beta = parametrization.theta[1]

Comment on lines +176 to +180
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Accept multivariate parameter

def pdf(theta: Any) -> Any:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use conjugate_sufficent instead

something like

np.exp(np.dot(conjugate_sufficient(theta), chi + [nu])).item()

if not hasattr(theta, "__len__"):
theta = [theta]
parametrization = ExponentialFamilyParametrization(theta=theta)
return np.exp(np.dot(theta, alpha) + beta * self._log_partition(parametrization))[0]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead x[0] use x.item() for single element array


all_value = nquad(
lambda x: pdf(x) if self._parameter_space.accepts(x) else 0, # type: ignore[arg-type]
[(float("-inf"), float("+inf"))],
)[0]
return -np.log(all_value)

def conjugate_sufficient_accepts(
parametrization: ExponentialFamilyParametrization,
) -> bool:
theta = parametrization.theta
xi = theta[:-1]
nu = theta[-1]

return self._sufficient_statistics_values.accepts(xi) and SpacePredicateArray(
[(0, float("+inf"))]
).accepts(nu)

return NaturalExponentialFamily(
log_partition=conjugate_log_partition,
sufficient_statistics=conjugate_sufficient,
normalization_constant=lambda _: 1,
support=self._parameter_space,
sufficient_statistics_values=self._parameter_space, # TODO: write convex hull for this
parameter_space=SpacePredicate(conjugate_sufficient_accepts),
name=self.name,
sampling_strategy=self.sampling_strategy,
distr_type=self._distr_type,
distr_parametrizations=self.parametrization_names,
support_by_parametrization=self.support_resolver,
)

def transform(
self,
transform_function: Callable[[Any], Any],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add docstring
transform function is mapping from user parametrization to canonical

) -> NaturalExponentialFamily:
def calculate_jacobian(x: Any) -> Any:
if type(x) is not list:
x = np.array([x])

return np.abs(det(jacobian(transform_function, x).df))

def new_support(x: Any) -> bool:
return self._support.accepts(transform_function(x))

def new_sufficient(x: Any) -> Any:
return self._sufficient(transform_function(x))

def new_normalization(x: Any) -> Any:
return self._normalization(x) * calculate_jacobian(x)

return NaturalExponentialFamily(
log_partition=self._log_partition,
sufficient_statistics=new_sufficient,
normalization_constant=new_normalization,
support=SpacePredicate(new_support),
parameter_space=self._parameter_space,
sufficient_statistics_values=self._sufficient_statistics_values,
name=f"Transformed{self._name}",
distr_type=self._distr_type,
distr_parametrizations=self.parametrization_names,
sampling_strategy=self.sampling_strategy,
support_by_parametrization=self.support_resolver,
)

@property
def _mean(self) -> ParametrizedFunction:
def mean_func(parametrization: Parametrization, x: Any) -> Any:
parametrization = cast(ExponentialFamilyParametrization, parametrization)
dimension_size = 1
if hasattr(x, "__len__"):
dimension_size = len(x)
return nquad(
lambda x: ( # type: ignore[arg-type]
np.dot(x, self.density(parametrization, x)) if self._support.accepts(x) else 0
),
[(float("-inf"), float("inf"))] * dimension_size,
)[0]

return mean_func

@property
def _second_moment(self) -> ParametrizedFunction:
def func(parametrization: Parametrization, x: Any) -> Any:
parametrization = cast(ExponentialFamilyParametrization, parametrization)
dimension_size = 1
if hasattr(x, "__len__"):
dimension_size = len(x)
return nquad(
lambda x: ( # type: ignore[arg-type]
x**2 * self.density(parametrization, x) if self._support.accepts(x) else 0
),
[(float("-inf"), float("inf"))] * dimension_size,
)[0]

return func

@property
def _var(self) -> ParametrizedFunction:
def func(parametrization: Parametrization, x: Any) -> Any:
parametrization = cast(ExponentialFamilyParametrization, parametrization)
return self._second_moment(parametrization, x) - self._mean(parametrization, x) ** 2

return func

def posterior_hyperparameters(
self, prior_hyper: ExponentialConjugateHyperparameters, sample: list[Any]
) -> ExponentialConjugateHyperparameters:
alpha = prior_hyper.alpha
beta = prior_hyper.beta
Comment on lines +294 to +295
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

naming:

alpha -> effective_suff_stat_value
beta -> effective_sample_size

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change globally


alpha_post = None
beta_post = None
if hasattr(sample, "__iter__") and not isinstance(sample, str):
alpha_post = np.sum([self._sufficient(x) for x in sample], axis=0)
beta_post = len(sample)
else:
alpha_post = self._sufficient(sample)
beta_post = 1

return ExponentialConjugateHyperparameters(alpha=alpha + alpha_post, beta=beta + beta_post)


class ExponentialFamily(NaturalExponentialFamily):
def __init__(
self,
*,
log_partition: Callable[[ExponentialFamilyParametrization], float],
sufficient_statistics: Callable[[Any], Any],
normalization_constant: Callable[[Any], Any],
parameter_from_natural_parameter: Callable[[Any], Any],
natural_parameter: Callable[
[ExponentialFamilyParametrization], ExponentialFamilyParametrization
],
support: SpacePredicate,
parameter_space: SpacePredicate,
sufficient_statistics_values: SpacePredicate,
distr_type: DistributionType | Callable[[Parametrization], DistributionType],
distr_parametrizations: list[ParametrizationName],
sampling_strategy: SamplingStrategy,
name: str = "ExponentialFamily",
support_by_parametrization: SupportArg = None,
):
def natural_log_partition(
eta_parametrizaion: ExponentialFamilyParametrization,
) -> Any:
eta = eta_parametrizaion.theta
theta = parameter_from_natural_parameter(eta)
return log_partition(ExponentialFamilyParametrization(theta=[theta]))

natural_sufficient_statistics_values = SpacePredicate(
lambda eta: sufficient_statistics_values.accepts(parameter_from_natural_parameter(eta))
)

self._natural_parameter = natural_parameter
natural_parameter_space = SpacePredicate(
lambda eta: parameter_space.accepts(parameter_from_natural_parameter(eta)),
)

NaturalExponentialFamily.__init__(
self,
log_partition=natural_log_partition,
sufficient_statistics=sufficient_statistics,
normalization_constant=normalization_constant,
support=support,
parameter_space=natural_parameter_space,
sufficient_statistics_values=natural_sufficient_statistics_values,
name=name,
distr_parametrizations=distr_parametrizations,
distr_type=distr_type,
sampling_strategy=sampling_strategy,
support_by_parametrization=support_by_parametrization,
)

def _transform_to_natural_parametrization(
self, theta_parametrization: ExponentialFamilyParametrization
) -> ExponentialFamilyParametrization:
return self._natural_parameter(theta_parametrization)
Loading