Skip to content

Commit f312459

Browse files
authored
25031: Adds support for fanout features to infer_feature_attributes and the new 'filter_fanout_values" flags on react endpoints, MINOR (#593)
1 parent b255051 commit f312459

7 files changed

Lines changed: 80 additions & 13 deletions

File tree

howso/client/base.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1681,6 +1681,7 @@ def react( # noqa: C901
16811681
feature_bounds_map: t.Optional[Mapping] = None,
16821682
feature_pre_process_code_map: t.Optional[Mapping] = None,
16831683
feature_post_process_code_map: t.Optional[Mapping] = None,
1684+
filter_fanout_values: bool = False,
16841685
generate_new_cases: GenerateNewCases = "no",
16851686
goal_features_map: t.Optional[Mapping] = None,
16861687
initial_batch_size: t.Optional[int] = None,
@@ -2202,7 +2203,10 @@ def react( # noqa: C901
22022203
resulting value will be used as part of the context for following
22032204
action features. The custom code will have access to all context
22042205
feature values and previously generated action feature values.
2205-
2206+
filter_fanout_values : bool, default False
2207+
When true, predictions of features with fanned out values will be
2208+
made while holding out other cases that had the same values
2209+
duplicated.
22062210
generate_new_cases : {"always", "attempt", "no"}, default "no"
22072211
(Optional) Whether to generate new cases.
22082212
@@ -2411,6 +2415,7 @@ def react( # noqa: C901
24112415
"derived_action_features": derived_action_features,
24122416
"feature_pre_process_code_map": feature_pre_process_code_map,
24132417
"feature_post_process_code_map": feature_post_process_code_map,
2418+
"filter_fanout_values": filter_fanout_values,
24142419
"goal_features_map": goal_features_map,
24152420
"post_process_features": post_process_features,
24162421
"post_process_values": post_process_values,
@@ -2457,6 +2462,7 @@ def react( # noqa: C901
24572462
"derived_action_features": derived_action_features,
24582463
"feature_pre_process_code_map": feature_pre_process_code_map,
24592464
"feature_post_process_code_map": feature_post_process_code_map,
2465+
"filter_fanout_values": filter_fanout_values,
24602466
"post_process_features": post_process_features,
24612467
"post_process_values": post_process_values,
24622468
"use_differential_privacy": use_differential_privacy,
@@ -2764,6 +2770,7 @@ def react_series( # noqa: C901
27642770
exclude_novel_nominals_from_uniqueness_check: bool = False,
27652771
feature_bounds_map: t.Optional[Mapping[str, Mapping[str, t.Any]]] = None,
27662772
feature_post_process_code_map: t.Optional[Mapping] = None,
2773+
filter_fanout_values: bool = False,
27672774
final_time_steps: t.Optional[list[t.Any]] = None,
27682775
generate_new_cases: GenerateNewCases = "no",
27692776
goal_features_map: t.Optional[Mapping] = None,
@@ -2897,6 +2904,10 @@ def react_series( # noqa: C901
28972904
feature values and previously generated action feature values of
28982905
the timestep being generated, as well as the feature values of all
28992906
previously generated timesteps.
2907+
filter_fanout_values : bool, default False
2908+
When true, predictions of features with fanned out values will be
2909+
made while holding out other cases that had the same values
2910+
duplicated.
29002911
series_context_features : iterable of str, optional
29012912
List of context features corresponding to ``series_context_values``.
29022913
series_context_values : list of list of list of object or list of DataFrame, optional
@@ -3130,6 +3141,7 @@ def react_series( # noqa: C901
31303141
"constraints": constraints,
31313142
"continue_series": continue_series,
31323143
"feature_post_process_code_map": feature_post_process_code_map,
3144+
"filter_fanout_values": filter_fanout_values,
31333145
"final_time_steps": final_time_steps,
31343146
"init_time_steps": init_time_steps,
31353147
"series_stop_maps": series_stop_maps,
@@ -3183,6 +3195,7 @@ def react_series( # noqa: C901
31833195
"constraints": constraints,
31843196
"continue_series": continue_series,
31853197
"feature_post_process_code_map": feature_post_process_code_map,
3198+
"filter_fanout_values": filter_fanout_values,
31863199
"final_time_steps": final_time_steps,
31873200
"init_time_steps": init_time_steps,
31883201
"series_stop_maps": series_stop_maps,
@@ -3689,6 +3702,7 @@ def react_aggregate( # noqa: C901
36893702
convergence_threshold: t.Optional[float] = None,
36903703
features_to_derive: t.Optional[Collection[str]] = None,
36913704
feature_influences_action_feature: t.Optional[str] = None,
3705+
filter_fanout_values: bool = False,
36923706
forecast_window_length: t.Optional[float] = None,
36933707
goal_dependent_features: t.Optional[Collection[str]] = None,
36943708
goal_features_map: t.Optional[Mapping] = None,
@@ -3711,7 +3725,7 @@ def react_aggregate( # noqa: C901
37113725
value_robust_contributions_features: t.Optional[Collection[str]] = None,
37123726
value_robust_contributions_num_buckets: int = 30,
37133727
value_robust_contributions_min_samples: int = 15,
3714-
value_robust_contributions_min_cases: int = 15,
3728+
value_robust_contributions_min_cases: int | dict[str, int] = 15,
37153729
weight_feature: t.Optional[str] = None,
37163730
) -> dict[str, dict[str, t.Any]]:
37173731
"""
@@ -3914,6 +3928,10 @@ def react_aggregate( # noqa: C901
39143928
feature_influences_action_feature : str, optional
39153929
When computing feature influences such as accuracy and prediction contributions, use this feature as
39163930
the action feature. If feature influences ``details`` are selected, this feature must be provided.
3931+
filter_fanout_values : bool, default False
3932+
When true, predictions of features with fanned out values will be
3933+
made while holding out other cases that had the same values
3934+
duplicated.
39173935
forecast_window_length : float, optional
39183936
A value specifing a length of time over which to measure the accuracy of forecasts. When
39193937
specified, returned prediction statistics and full residuals will be measuring the accuracy
@@ -4044,11 +4062,12 @@ def react_aggregate( # noqa: C901
40444062
The minumum number of samples required for a combination of feature values for its
40454063
aggregated measure to be returned when computing the "value_robust_accuracy_contributions",
40464064
"value_robust_prediction_contributions" or "value_robust_surprisal_asymmetry" details.
4047-
value_robust_contributions_min_cases: int, default 15
4065+
value_robust_contributions_min_cases: int or map of str to int, default 15
40484066
The minimum number of unique cases for a given nominal class or continuous bucket to be
40494067
used as a possible feature value when collecting all combinations of feature values in
4050-
the data to report metrics over. If unspecified, there is no filtering based on number
4051-
of unique cases.
4068+
the data to report metrics over. May be specified as a single value or a mapping of feature names to
4069+
values defining individual thresholds for each feature. If defined as a mapping, then any features without
4070+
defined thresholds will use a default value of 15.
40524071
weight_feature : str, optional
40534072
The name of feature whose values to use as case weights.
40544073
When left unspecified uses the internally managed case weight.
@@ -4107,6 +4126,7 @@ def react_aggregate( # noqa: C901
41074126
"convergence_threshold": convergence_threshold,
41084127
"features_to_derive": features_to_derive,
41094128
"feature_influences_action_feature": feature_influences_action_feature,
4129+
"filter_fanout_values": filter_fanout_values,
41104130
"forecast_window_length": forecast_window_length,
41114131
"goal_dependent_features": goal_dependent_features,
41124132
"goal_features_map": goal_features_map,

howso/client/typing.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,6 @@ class FeatureTimeSeries(TypedDict, total=False):
242242
the default.
243243
"""
244244

245-
246245
class FeatureAttributes(TypedDict):
247246
"""
248247
Attributes for a single feature.
@@ -344,6 +343,14 @@ class FeatureAttributes(TypedDict):
344343
on values based on other multi-type value features.
345344
"""
346345

346+
fanout_on: NotRequired[list[str]]
347+
"""
348+
Features whose values can be used to select other cases that have the same
349+
duplicated value for this fan-out feature.
350+
351+
Should be used when this is a fan-out feature.
352+
"""
353+
347354
derived_feature_code: NotRequired[str]
348355
"""
349356
Code defining how to derive this feature's value.

howso/engine/trainee.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,6 +1255,7 @@ def react(
12551255
feature_bounds_map: t.Optional[Mapping[str, Mapping[str, t.Any]]] = None,
12561256
feature_pre_process_code_map: t.Optional[Mapping] = None,
12571257
feature_post_process_code_map: t.Optional[Mapping] = None,
1258+
filter_fanout_values: bool = False,
12581259
generate_new_cases: GenerateNewCases = "no",
12591260
goal_features_map: t.Optional[Mapping] = None,
12601261
initial_batch_size: t.Optional[int] = None,
@@ -1723,7 +1724,10 @@ def react(
17231724
resulting value will be used as part of the context for following
17241725
action features. The custom code will have access to all context
17251726
feature values and previously generated action feature values.
1726-
1727+
filter_fanout_values : bool, default False
1728+
When true, predictions of features with fanned out values will be
1729+
made while holding out other cases that had the same values
1730+
duplicated.
17271731
generate_new_cases : {"always", "attempt", "no"}, default "no"
17281732
This parameter takes in a string that may be one of the following:
17291733
@@ -1868,6 +1872,7 @@ def react(
18681872
feature_bounds_map=feature_bounds_map,
18691873
feature_pre_process_code_map=feature_pre_process_code_map,
18701874
feature_post_process_code_map=feature_post_process_code_map,
1875+
filter_fanout_values=filter_fanout_values,
18711876
generate_new_cases=generate_new_cases,
18721877
goal_features_map=goal_features_map,
18731878
initial_batch_size=initial_batch_size,
@@ -1906,6 +1911,7 @@ def react_series(
19061911
exclude_novel_nominals_from_uniqueness_check: bool = False,
19071912
feature_bounds_map: t.Optional[Mapping[str, Mapping[str, t.Any]]] = None,
19081913
feature_post_process_code_map: t.Optional[Mapping] = None,
1914+
filter_fanout_values: bool = False,
19091915
final_time_steps: t.Optional[list[t.Any]] = None,
19101916
generate_new_cases: GenerateNewCases = "no",
19111917
goal_features_map: t.Optional[Mapping] = None,
@@ -2017,6 +2023,10 @@ def react_series(
20172023
feature values and previously generated action feature values of
20182024
the time-step being generated, as well as the feature values of all
20192025
previously generated time-steps.
2026+
filter_fanout_values : bool, default False
2027+
When true, predictions of features with fanned out values will be
2028+
made while holding out other cases that had the same values
2029+
duplicated.
20202030
final_time_steps: list of object, optional
20212031
The time steps at which to end synthesis. Time-series only.
20222032
Time-series only. Must provide either one for all series, or
@@ -2157,6 +2167,7 @@ def react_series(
21572167
exclude_novel_nominals_from_uniqueness_check=exclude_novel_nominals_from_uniqueness_check,
21582168
feature_bounds_map=feature_bounds_map,
21592169
feature_post_process_code_map=feature_post_process_code_map,
2170+
filter_fanout_values=filter_fanout_values,
21602171
final_time_steps=final_time_steps,
21612172
generate_new_cases=generate_new_cases,
21622173
goal_features_map=goal_features_map,
@@ -3582,6 +3593,7 @@ def react_aggregate(
35823593
convergence_threshold: t.Optional[float] = None,
35833594
features_to_derive: t.Optional[Collection[str]] = None,
35843595
feature_influences_action_feature: t.Optional[str] = None,
3596+
filter_fanout_values: bool = False,
35853597
forecast_window_length: t.Optional[float] = None,
35863598
goal_dependent_features: t.Optional[Collection[str]] = None,
35873599
goal_features_map: t.Optional[Mapping] = None,
@@ -3604,7 +3616,7 @@ def react_aggregate(
36043616
value_robust_contributions_features: t.Optional[Collection[str]] = None,
36053617
value_robust_contributions_num_buckets: int = 30,
36063618
value_robust_contributions_min_samples: int = 15,
3607-
value_robust_contributions_min_cases: int = 15,
3619+
value_robust_contributions_min_cases: int | dict[str, int] = 15,
36083620
weight_feature: t.Optional[str] = None,
36093621
) -> AggregateReaction:
36103622
"""
@@ -3811,6 +3823,10 @@ def react_aggregate(
38113823
not providing this feature will return a matrix where each feature is used as an action feature. However,
38123824
providing this feature if 'feature_robust_accuracy_contributions' is selected is still accepted, and will
38133825
return just the feature influences for the selected feature.
3826+
filter_fanout_values : bool, default False
3827+
When true, predictions of features with fanned out values will be
3828+
made while holding out other cases that had the same values
3829+
duplicated.
38143830
forecast_window_length : float, optional
38153831
A value specifying a length of time over which to measure the accuracy of forecasts. When
38163832
specified, returned prediction statistics and full residuals will be measuring the accuracy
@@ -3941,11 +3957,12 @@ def react_aggregate(
39413957
The minumum number of samples required for a combination of feature values for its
39423958
aggregated measure to be returned when computing the "value_robust_accuracy_contributions",
39433959
"value_robust_prediction_contributions" or "value_robust_surprisal_asymmetry" details.
3944-
value_robust_contributions_min_cases: int, default 15
3960+
value_robust_contributions_min_cases: int or map of str to int, default 15
39453961
The minimum number of unique cases for a given nominal class or continuous bucket to be
39463962
used as a possible feature value when collecting all combinations of feature values in
3947-
the data to report metrics over. If unspecified, there is no filtering based on number
3948-
of unique cases.
3963+
the data to report metrics over. May be specified as a single value or a mapping of feature names to
3964+
values defining individual thresholds for each feature. If defined as a mapping, then any features without
3965+
defined thresholds will use a default value of 15.
39493966
weight_feature : str, optional
39503967
The name of feature whose values to use as case weights.
39513968
When left unspecified uses the internally managed case weight.
@@ -3967,6 +3984,7 @@ def react_aggregate(
39673984
convergence_threshold=convergence_threshold,
39683985
features_to_derive=features_to_derive,
39693986
feature_influences_action_feature=feature_influences_action_feature,
3987+
filter_fanout_values=filter_fanout_values,
39703988
forecast_window_length=forecast_window_length,
39713989
goal_dependent_features=goal_dependent_features,
39723990
goal_features_map=goal_features_map,

howso/utilities/feature_attributes/base.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,7 @@ def _process(self, # noqa: C901
763763
datetime_feature_formats: t.Optional[dict] = None,
764764
default_time_zone: t.Optional[str] = None,
765765
dependent_features: t.Optional[dict[str, list[str]]] = None,
766+
fanout_feature_map: t.Optional[dict[tuple[str] | str, list[str]]] = None,
766767
id_feature_name: t.Optional[str | Iterable[str]] = None,
767768
include_extended_nominal_probabilities: t.Optional[bool] = False,
768769
include_sample: bool = False,
@@ -1108,14 +1109,23 @@ def _process(self, # noqa: C901
11081109
# Validate datetimes after any user-defined features have been re-implemented
11091110
self._validate_date_times()
11101111

1112+
# Configure the fanout feature attributes according to the input if given.
1113+
if fanout_feature_map:
1114+
for key_features, fanout_features in fanout_feature_map.items():
1115+
if isinstance(key_features, str):
1116+
key_features = [key_features]
1117+
for f in fanout_features:
1118+
if f in self.attributes:
1119+
self.attributes[f]['fanout_on'] = list(key_features)
1120+
11111121
# Re-order the keys like the original dataframe
11121122
ordered_attributes = {}
11131123
for fname in self.data.columns:
11141124
# Check to see if the key is a sqlalchemy Column
11151125
if hasattr(fname, 'name'):
11161126
fname = fname.name
11171127
if fname not in self.attributes.keys():
1118-
warnings.warn(f'Feature {fname} exists in provided data but was not computed in feature attributes')
1128+
warnings.warn(f'Feature {fname} exists in provided data but was not computed in feature attributes.')
11191129
continue
11201130
ordered_attributes[fname] = self.attributes[fname]
11211131

howso/utilities/feature_attributes/infer_feature_attributes.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,11 @@ def infer_feature_attributes(data: pd.DataFrame | SQLRelationalDatastoreProtocol
119119
to 2 will synthesize the 3rd order derivative value, and then use
120120
that synthed value to derive the 2nd and 1st order.
121121
122+
fanout_feature_map : dict of str or tuple of str to list of str, optional
123+
(Optional) Dict mapping "key" feature names or tuples of "key" feature names to list of "fanout" feature names.
124+
Fanout features are features with values fanned out across multiple cases. Key features are features
125+
whose values can be used to select groups of cases that have the same duplicated fanout values.
126+
122127
id_feature_name : str or list of str, default None
123128
(Optional) The name(s) of the ID feature(s).
124129

howso/utilities/feature_attributes/time_series.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ def _process( # noqa: C901
330330
delta_boundaries: t.Optional[dict] = None,
331331
dependent_features: t.Optional[dict] = None,
332332
derived_orders: t.Optional[dict] = None,
333+
fanout_feature_map: t.Optional[dict[str | tuple[str], list[str]]] = None,
333334
id_feature_name: t.Optional[str | Iterable[str]] = None,
334335
include_extended_nominal_probabilities: t.Optional[bool] = False,
335336
include_sample: bool = False,
@@ -448,6 +449,11 @@ def _process( # noqa: C901
448449
to 2 will synthesize the 3rd order derivative value, and then use
449450
that synthed value to derive the 2nd and 1st order.
450451
452+
fanout_feature_map : dict of str or tuple of str to list of str, optional
453+
(Optional) Dict mapping "key" feature names or tuples of "key" feature names to list of "fanout" feature names.
454+
Fanout features are features with values fanned out across multiple cases. Key features are features
455+
whose values can be used to select groups of cases that have the same duplicated fanout values.
456+
451457
id_feature_name : str or list of str default None
452458
(Optional) The name(s) of the ID feature(s).
453459
@@ -633,6 +639,7 @@ def _process( # noqa: C901
633639
datetime_feature_formats=datetime_feature_formats,
634640
default_time_zone=default_time_zone,
635641
dependent_features=dependent_features,
642+
fanout_feature_map=fanout_feature_map,
636643
id_feature_name=id_feature_name,
637644
include_extended_nominal_probabilities=include_extended_nominal_probabilities,
638645
include_sample=include_sample,

version.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
22
"dependencies": {
3-
"howso-engine": "110.3.0"
3+
"howso-engine": "110.5.0"
44
}
55
}

0 commit comments

Comments
 (0)