Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions howso/client/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1681,6 +1681,7 @@ def react( # noqa: C901
feature_bounds_map: t.Optional[Mapping] = None,
feature_pre_process_code_map: t.Optional[Mapping] = None,
feature_post_process_code_map: t.Optional[Mapping] = None,
filter_fanout_values: bool = False,
generate_new_cases: GenerateNewCases = "no",
goal_features_map: t.Optional[Mapping] = None,
initial_batch_size: t.Optional[int] = None,
Expand Down Expand Up @@ -2202,7 +2203,10 @@ def react( # noqa: C901
resulting value will be used as part of the context for following
action features. The custom code will have access to all context
feature values and previously generated action feature values.

filter_fanout_values : bool, default False
When true, predictions of features with fanned out values will be
made while holding out other cases that had the same values
duplicated.
generate_new_cases : {"always", "attempt", "no"}, default "no"
(Optional) Whether to generate new cases.

Expand Down Expand Up @@ -2411,6 +2415,7 @@ def react( # noqa: C901
"derived_action_features": derived_action_features,
"feature_pre_process_code_map": feature_pre_process_code_map,
"feature_post_process_code_map": feature_post_process_code_map,
"filter_fanout_values": filter_fanout_values,
"goal_features_map": goal_features_map,
"post_process_features": post_process_features,
"post_process_values": post_process_values,
Expand Down Expand Up @@ -2457,6 +2462,7 @@ def react( # noqa: C901
"derived_action_features": derived_action_features,
"feature_pre_process_code_map": feature_pre_process_code_map,
"feature_post_process_code_map": feature_post_process_code_map,
"filter_fanout_values": filter_fanout_values,
"post_process_features": post_process_features,
"post_process_values": post_process_values,
"use_differential_privacy": use_differential_privacy,
Expand Down Expand Up @@ -2764,6 +2770,7 @@ def react_series( # noqa: C901
exclude_novel_nominals_from_uniqueness_check: bool = False,
feature_bounds_map: t.Optional[Mapping[str, Mapping[str, t.Any]]] = None,
feature_post_process_code_map: t.Optional[Mapping] = None,
filter_fanout_values: bool = False,
final_time_steps: t.Optional[list[t.Any]] = None,
generate_new_cases: GenerateNewCases = "no",
goal_features_map: t.Optional[Mapping] = None,
Expand Down Expand Up @@ -2897,6 +2904,10 @@ def react_series( # noqa: C901
feature values and previously generated action feature values of
the timestep being generated, as well as the feature values of all
previously generated timesteps.
filter_fanout_values : bool, default False
When true, predictions of features with fanned out values will be
made while holding out other cases that had the same values
duplicated.
series_context_features : iterable of str, optional
List of context features corresponding to ``series_context_values``.
series_context_values : list of list of list of object or list of DataFrame, optional
Expand Down Expand Up @@ -3130,6 +3141,7 @@ def react_series( # noqa: C901
"constraints": constraints,
"continue_series": continue_series,
"feature_post_process_code_map": feature_post_process_code_map,
"filter_fanout_values": filter_fanout_values,
"final_time_steps": final_time_steps,
"init_time_steps": init_time_steps,
"series_stop_maps": series_stop_maps,
Expand Down Expand Up @@ -3183,6 +3195,7 @@ def react_series( # noqa: C901
"constraints": constraints,
"continue_series": continue_series,
"feature_post_process_code_map": feature_post_process_code_map,
"filter_fanout_values": filter_fanout_values,
"final_time_steps": final_time_steps,
"init_time_steps": init_time_steps,
"series_stop_maps": series_stop_maps,
Expand Down Expand Up @@ -3689,6 +3702,7 @@ def react_aggregate( # noqa: C901
convergence_threshold: t.Optional[float] = None,
features_to_derive: t.Optional[Collection[str]] = None,
feature_influences_action_feature: t.Optional[str] = None,
filter_fanout_values: bool = False,
forecast_window_length: t.Optional[float] = None,
goal_dependent_features: t.Optional[Collection[str]] = None,
goal_features_map: t.Optional[Mapping] = None,
Expand All @@ -3711,7 +3725,7 @@ def react_aggregate( # noqa: C901
value_robust_contributions_features: t.Optional[Collection[str]] = None,
value_robust_contributions_num_buckets: int = 30,
value_robust_contributions_min_samples: int = 15,
value_robust_contributions_min_cases: int = 15,
value_robust_contributions_min_cases: int | dict[str, int] = 15,
weight_feature: t.Optional[str] = None,
) -> dict[str, dict[str, t.Any]]:
"""
Expand Down Expand Up @@ -3914,6 +3928,10 @@ def react_aggregate( # noqa: C901
feature_influences_action_feature : str, optional
When computing feature influences such as accuracy and prediction contributions, use this feature as
the action feature. If feature influences ``details`` are selected, this feature must be provided.
filter_fanout_values : bool, default False
When true, predictions of features with fanned out values will be
made while holding out other cases that had the same values
duplicated.
forecast_window_length : float, optional
A value specifing a length of time over which to measure the accuracy of forecasts. When
specified, returned prediction statistics and full residuals will be measuring the accuracy
Expand Down Expand Up @@ -4044,11 +4062,12 @@ def react_aggregate( # noqa: C901
The minumum number of samples required for a combination of feature values for its
aggregated measure to be returned when computing the "value_robust_accuracy_contributions",
"value_robust_prediction_contributions" or "value_robust_surprisal_asymmetry" details.
value_robust_contributions_min_cases: int, default 15
value_robust_contributions_min_cases: int or map of str to int, default 15
The minimum number of unique cases for a given nominal class or continuous bucket to be
used as a possible feature value when collecting all combinations of feature values in
the data to report metrics over. If unspecified, there is no filtering based on number
of unique cases.
the data to report metrics over. May be specified as a single value or a mapping of feature names to
values defining individual thresholds for each feature. If defined as a mapping, then any features without
defined thresholds will use a default value of 15.
weight_feature : str, optional
The name of feature whose values to use as case weights.
When left unspecified uses the internally managed case weight.
Expand Down Expand Up @@ -4107,6 +4126,7 @@ def react_aggregate( # noqa: C901
"convergence_threshold": convergence_threshold,
"features_to_derive": features_to_derive,
"feature_influences_action_feature": feature_influences_action_feature,
"filter_fanout_values": filter_fanout_values,
"forecast_window_length": forecast_window_length,
"goal_dependent_features": goal_dependent_features,
"goal_features_map": goal_features_map,
Expand Down
9 changes: 8 additions & 1 deletion howso/client/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,6 @@ class FeatureTimeSeries(TypedDict, total=False):
the default.
"""


class FeatureAttributes(TypedDict):
"""
Attributes for a single feature.
Expand Down Expand Up @@ -344,6 +343,14 @@ class FeatureAttributes(TypedDict):
on values based on other multi-type value features.
"""

fanout_on: NotRequired[list[str]]
"""
Features whose values can be used to select other cases that have the same
duplicated value for this fan-out feature.

Should be used when this is a fan-out feature.
"""

derived_feature_code: NotRequired[str]
"""
Code defining how to derive this feature's value.
Expand Down
28 changes: 23 additions & 5 deletions howso/engine/trainee.py
Original file line number Diff line number Diff line change
Expand Up @@ -1255,6 +1255,7 @@ def react(
feature_bounds_map: t.Optional[Mapping[str, Mapping[str, t.Any]]] = None,
feature_pre_process_code_map: t.Optional[Mapping] = None,
feature_post_process_code_map: t.Optional[Mapping] = None,
filter_fanout_values: bool = False,
generate_new_cases: GenerateNewCases = "no",
goal_features_map: t.Optional[Mapping] = None,
initial_batch_size: t.Optional[int] = None,
Expand Down Expand Up @@ -1723,7 +1724,10 @@ def react(
resulting value will be used as part of the context for following
action features. The custom code will have access to all context
feature values and previously generated action feature values.

filter_fanout_values : bool, default False
When true, predictions of features with fanned out values will be
made while holding out other cases that had the same values
duplicated.
generate_new_cases : {"always", "attempt", "no"}, default "no"
This parameter takes in a string that may be one of the following:

Expand Down Expand Up @@ -1868,6 +1872,7 @@ def react(
feature_bounds_map=feature_bounds_map,
feature_pre_process_code_map=feature_pre_process_code_map,
feature_post_process_code_map=feature_post_process_code_map,
filter_fanout_values=filter_fanout_values,
generate_new_cases=generate_new_cases,
goal_features_map=goal_features_map,
initial_batch_size=initial_batch_size,
Expand Down Expand Up @@ -1906,6 +1911,7 @@ def react_series(
exclude_novel_nominals_from_uniqueness_check: bool = False,
feature_bounds_map: t.Optional[Mapping[str, Mapping[str, t.Any]]] = None,
feature_post_process_code_map: t.Optional[Mapping] = None,
filter_fanout_values: bool = False,
final_time_steps: t.Optional[list[t.Any]] = None,
generate_new_cases: GenerateNewCases = "no",
goal_features_map: t.Optional[Mapping] = None,
Expand Down Expand Up @@ -2017,6 +2023,10 @@ def react_series(
feature values and previously generated action feature values of
the time-step being generated, as well as the feature values of all
previously generated time-steps.
filter_fanout_values : bool, default False
When true, predictions of features with fanned out values will be
made while holding out other cases that had the same values
duplicated.
final_time_steps: list of object, optional
The time steps at which to end synthesis. Time-series only.
Time-series only. Must provide either one for all series, or
Expand Down Expand Up @@ -2157,6 +2167,7 @@ def react_series(
exclude_novel_nominals_from_uniqueness_check=exclude_novel_nominals_from_uniqueness_check,
feature_bounds_map=feature_bounds_map,
feature_post_process_code_map=feature_post_process_code_map,
filter_fanout_values=filter_fanout_values,
final_time_steps=final_time_steps,
generate_new_cases=generate_new_cases,
goal_features_map=goal_features_map,
Expand Down Expand Up @@ -3582,6 +3593,7 @@ def react_aggregate(
convergence_threshold: t.Optional[float] = None,
features_to_derive: t.Optional[Collection[str]] = None,
feature_influences_action_feature: t.Optional[str] = None,
filter_fanout_values: bool = False,
forecast_window_length: t.Optional[float] = None,
goal_dependent_features: t.Optional[Collection[str]] = None,
goal_features_map: t.Optional[Mapping] = None,
Expand All @@ -3604,7 +3616,7 @@ def react_aggregate(
value_robust_contributions_features: t.Optional[Collection[str]] = None,
value_robust_contributions_num_buckets: int = 30,
value_robust_contributions_min_samples: int = 15,
value_robust_contributions_min_cases: int = 15,
value_robust_contributions_min_cases: int | dict[str, int] = 15,
weight_feature: t.Optional[str] = None,
) -> AggregateReaction:
"""
Expand Down Expand Up @@ -3811,6 +3823,10 @@ def react_aggregate(
not providing this feature will return a matrix where each feature is used as an action feature. However,
providing this feature if 'feature_robust_accuracy_contributions' is selected is still accepted, and will
return just the feature influences for the selected feature.
filter_fanout_values : bool, default False
When true, predictions of features with fanned out values will be
made while holding out other cases that had the same values
duplicated.
forecast_window_length : float, optional
A value specifying a length of time over which to measure the accuracy of forecasts. When
specified, returned prediction statistics and full residuals will be measuring the accuracy
Expand Down Expand Up @@ -3941,11 +3957,12 @@ def react_aggregate(
The minumum number of samples required for a combination of feature values for its
aggregated measure to be returned when computing the "value_robust_accuracy_contributions",
"value_robust_prediction_contributions" or "value_robust_surprisal_asymmetry" details.
value_robust_contributions_min_cases: int, default 15
value_robust_contributions_min_cases: int or map of str to int, default 15
The minimum number of unique cases for a given nominal class or continuous bucket to be
used as a possible feature value when collecting all combinations of feature values in
the data to report metrics over. If unspecified, there is no filtering based on number
of unique cases.
the data to report metrics over. May be specified as a single value or a mapping of feature names to
values defining individual thresholds for each feature. If defined as a mapping, then any features without
defined thresholds will use a default value of 15.
weight_feature : str, optional
The name of feature whose values to use as case weights.
When left unspecified uses the internally managed case weight.
Expand All @@ -3967,6 +3984,7 @@ def react_aggregate(
convergence_threshold=convergence_threshold,
features_to_derive=features_to_derive,
feature_influences_action_feature=feature_influences_action_feature,
filter_fanout_values=filter_fanout_values,
forecast_window_length=forecast_window_length,
goal_dependent_features=goal_dependent_features,
goal_features_map=goal_features_map,
Expand Down
12 changes: 11 additions & 1 deletion howso/utilities/feature_attributes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,7 @@ def _process(self, # noqa: C901
datetime_feature_formats: t.Optional[dict] = None,
default_time_zone: t.Optional[str] = None,
dependent_features: t.Optional[dict[str, list[str]]] = None,
fanout_feature_map: t.Optional[dict[tuple[str] | str, list[str]]] = None,
id_feature_name: t.Optional[str | Iterable[str]] = None,
include_extended_nominal_probabilities: t.Optional[bool] = False,
include_sample: bool = False,
Expand Down Expand Up @@ -1108,14 +1109,23 @@ def _process(self, # noqa: C901
# Validate datetimes after any user-defined features have been re-implemented
self._validate_date_times()

# Configure the fanout feature attributes according to the input if given.
if fanout_feature_map:
for key_features, fanout_features in fanout_feature_map.items():
if isinstance(key_features, str):
key_features = [key_features]
for f in fanout_features:
if f in self.attributes:
self.attributes[f]['fanout_on'] = list(key_features)

# Re-order the keys like the original dataframe
ordered_attributes = {}
for fname in self.data.columns:
# Check to see if the key is a sqlalchemy Column
if hasattr(fname, 'name'):
fname = fname.name
if fname not in self.attributes.keys():
warnings.warn(f'Feature {fname} exists in provided data but was not computed in feature attributes')
warnings.warn(f'Feature {fname} exists in provided data but was not computed in feature attributes.')
continue
ordered_attributes[fname] = self.attributes[fname]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ def infer_feature_attributes(data: pd.DataFrame | SQLRelationalDatastoreProtocol
to 2 will synthesize the 3rd order derivative value, and then use
that synthed value to derive the 2nd and 1st order.

fanout_feature_map : dict of str or tuple of str to list of str, optional
(Optional) Dict mapping "key" feature names or tuples of "key" feature names to list of "fanout" feature names.
Fanout features are features with values fanned out across multiple cases. Key features are features
whose values can be used to select groups of cases that have the same duplicated fanout values.

id_feature_name : str or list of str, default None
(Optional) The name(s) of the ID feature(s).

Expand Down
7 changes: 7 additions & 0 deletions howso/utilities/feature_attributes/time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ def _process( # noqa: C901
delta_boundaries: t.Optional[dict] = None,
dependent_features: t.Optional[dict] = None,
derived_orders: t.Optional[dict] = None,
fanout_feature_map: t.Optional[dict[str | tuple[str], list[str]]] = None,
id_feature_name: t.Optional[str | Iterable[str]] = None,
include_extended_nominal_probabilities: t.Optional[bool] = False,
include_sample: bool = False,
Expand Down Expand Up @@ -448,6 +449,11 @@ def _process( # noqa: C901
to 2 will synthesize the 3rd order derivative value, and then use
that synthed value to derive the 2nd and 1st order.

fanout_feature_map : dict of str or tuple of str to list of str, optional
(Optional) Dict mapping "key" feature names or tuples of "key" feature names to list of "fanout" feature names.
Fanout features are features with values fanned out across multiple cases. Key features are features
whose values can be used to select groups of cases that have the same duplicated fanout values.

id_feature_name : str or list of str default None
(Optional) The name(s) of the ID feature(s).

Expand Down Expand Up @@ -633,6 +639,7 @@ def _process( # noqa: C901
datetime_feature_formats=datetime_feature_formats,
default_time_zone=default_time_zone,
dependent_features=dependent_features,
fanout_feature_map=fanout_feature_map,
id_feature_name=id_feature_name,
include_extended_nominal_probabilities=include_extended_nominal_probabilities,
include_sample=include_sample,
Expand Down
2 changes: 1 addition & 1 deletion version.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"dependencies": {
"howso-engine": "110.3.0"
"howso-engine": "110.5.0"
}
}
Loading