25031: Adds support for fanout features to infer_feature_attributes and the new 'filter_fanout_values" flags on react endpoints, MINOR (#593)

cademack · web-flow · commit f312459ffbbb · 2026-02-10T14:18:41.000-05:00
diff --git a/howso/client/base.py b/howso/client/base.py
@@ -1681,6 +1681,7 @@ def react(  # noqa: C901
         feature_bounds_map: t.Optional[Mapping] = None,
         feature_pre_process_code_map: t.Optional[Mapping] = None,
         feature_post_process_code_map: t.Optional[Mapping] = None,
+        filter_fanout_values: bool = False,
         generate_new_cases: GenerateNewCases = "no",
         goal_features_map: t.Optional[Mapping] = None,
         initial_batch_size: t.Optional[int] = None,
@@ -2202,7 +2203,10 @@ def react(  # noqa: C901
             resulting value will be used as part of the context for following
             action features. The custom code will have access to all context
             feature values and previously generated action feature values.
-
+        filter_fanout_values : bool, default False
+            When true, predictions of features with fanned out values will be
+            made while holding out other cases that had the same values
+            duplicated.
         generate_new_cases : {"always", "attempt", "no"}, default "no"
             (Optional) Whether to generate new cases.
 
@@ -2411,6 +2415,7 @@ def react(  # noqa: C901
                 "derived_action_features": derived_action_features,
                 "feature_pre_process_code_map": feature_pre_process_code_map,
                 "feature_post_process_code_map": feature_post_process_code_map,
+                "filter_fanout_values": filter_fanout_values,
                 "goal_features_map": goal_features_map,
                 "post_process_features": post_process_features,
                 "post_process_values": post_process_values,
@@ -2457,6 +2462,7 @@ def react(  # noqa: C901
                 "derived_action_features": derived_action_features,
                 "feature_pre_process_code_map": feature_pre_process_code_map,
                 "feature_post_process_code_map": feature_post_process_code_map,
+                "filter_fanout_values": filter_fanout_values,
                 "post_process_features": post_process_features,
                 "post_process_values": post_process_values,
                 "use_differential_privacy": use_differential_privacy,
@@ -2764,6 +2770,7 @@ def react_series(  # noqa: C901
         exclude_novel_nominals_from_uniqueness_check: bool = False,
         feature_bounds_map: t.Optional[Mapping[str, Mapping[str, t.Any]]] = None,
         feature_post_process_code_map: t.Optional[Mapping] = None,
+        filter_fanout_values: bool = False,
         final_time_steps: t.Optional[list[t.Any]] = None,
         generate_new_cases: GenerateNewCases = "no",
         goal_features_map: t.Optional[Mapping] = None,
@@ -2897,6 +2904,10 @@ def react_series(  # noqa: C901
             feature values and previously generated action feature values of
             the timestep being generated, as well as the feature values of all
             previously generated timesteps.
+        filter_fanout_values : bool, default False
+            When true, predictions of features with fanned out values will be
+            made while holding out other cases that had the same values
+            duplicated.
         series_context_features : iterable of str, optional
             List of context features corresponding to ``series_context_values``.
         series_context_values : list of list of list of object or list of DataFrame, optional
@@ -3130,6 +3141,7 @@ def react_series(  # noqa: C901
                 "constraints": constraints,
                 "continue_series": continue_series,
                 "feature_post_process_code_map": feature_post_process_code_map,
+                "filter_fanout_values": filter_fanout_values,
                 "final_time_steps": final_time_steps,
                 "init_time_steps": init_time_steps,
                 "series_stop_maps": series_stop_maps,
@@ -3183,6 +3195,7 @@ def react_series(  # noqa: C901
                 "constraints": constraints,
                 "continue_series": continue_series,
                 "feature_post_process_code_map": feature_post_process_code_map,
+                "filter_fanout_values": filter_fanout_values,
                 "final_time_steps": final_time_steps,
                 "init_time_steps": init_time_steps,
                 "series_stop_maps": series_stop_maps,
@@ -3689,6 +3702,7 @@ def react_aggregate(  # noqa: C901
         convergence_threshold: t.Optional[float] = None,
         features_to_derive: t.Optional[Collection[str]] = None,
         feature_influences_action_feature: t.Optional[str] = None,
+        filter_fanout_values: bool = False,
         forecast_window_length: t.Optional[float] = None,
         goal_dependent_features: t.Optional[Collection[str]] = None,
         goal_features_map: t.Optional[Mapping] = None,
@@ -3711,7 +3725,7 @@ def react_aggregate(  # noqa: C901
         value_robust_contributions_features: t.Optional[Collection[str]] = None,
         value_robust_contributions_num_buckets: int = 30,
         value_robust_contributions_min_samples: int = 15,
-        value_robust_contributions_min_cases: int = 15,
+        value_robust_contributions_min_cases: int | dict[str, int] = 15,
         weight_feature: t.Optional[str] = None,
     ) -> dict[str, dict[str, t.Any]]:
         """
@@ -3914,6 +3928,10 @@ def react_aggregate(  # noqa: C901
         feature_influences_action_feature : str, optional
             When computing feature influences such as accuracy and prediction contributions, use this feature as
             the action feature.  If feature influences ``details`` are selected, this feature must be provided.
+        filter_fanout_values : bool, default False
+            When true, predictions of features with fanned out values will be
+            made while holding out other cases that had the same values
+            duplicated.
         forecast_window_length : float, optional
             A value specifing a length of time over which to measure the accuracy of forecasts. When
             specified, returned prediction statistics and full residuals will be measuring the accuracy
@@ -4044,11 +4062,12 @@ def react_aggregate(  # noqa: C901
             The minumum number of samples required for a combination of feature values for its
             aggregated measure to be returned when computing the "value_robust_accuracy_contributions",
             "value_robust_prediction_contributions" or "value_robust_surprisal_asymmetry" details.
-        value_robust_contributions_min_cases: int, default 15
+        value_robust_contributions_min_cases: int or map of str to int, default 15
             The minimum number of unique cases for a given nominal class or continuous bucket to be
             used as a possible feature value when collecting all combinations of feature values in
-            the data to report metrics over. If unspecified, there is no filtering based on number
-            of unique cases.
+            the data to report metrics over. May be specified as a single value or a mapping of feature names to
+            values defining individual thresholds for each feature. If defined as a mapping, then any features without
+            defined thresholds will use a default value of 15.
         weight_feature : str, optional
             The name of feature whose values to use as case weights.
             When left unspecified uses the internally managed case weight.
@@ -4107,6 +4126,7 @@ def react_aggregate(  # noqa: C901
             "convergence_threshold": convergence_threshold,
             "features_to_derive": features_to_derive,
             "feature_influences_action_feature": feature_influences_action_feature,
+            "filter_fanout_values": filter_fanout_values,
             "forecast_window_length": forecast_window_length,
             "goal_dependent_features": goal_dependent_features,
             "goal_features_map": goal_features_map,
diff --git a/howso/client/typing.py b/howso/client/typing.py
@@ -242,7 +242,6 @@ class FeatureTimeSeries(TypedDict, total=False):
     the default.
     """
 
-
 class FeatureAttributes(TypedDict):
     """
     Attributes for a single feature.
@@ -344,6 +343,14 @@ class FeatureAttributes(TypedDict):
     on values based on other multi-type value features.
     """
 
+    fanout_on: NotRequired[list[str]]
+    """
+    Features whose values can be used to select other cases that have the same
+    duplicated value for this fan-out feature.
+
+    Should be used when this is a fan-out feature.
+    """
+
     derived_feature_code: NotRequired[str]
     """
     Code defining how to derive this feature's value.
diff --git a/howso/engine/trainee.py b/howso/engine/trainee.py
@@ -1255,6 +1255,7 @@ def react(
         feature_bounds_map: t.Optional[Mapping[str, Mapping[str, t.Any]]] = None,
         feature_pre_process_code_map: t.Optional[Mapping] = None,
         feature_post_process_code_map: t.Optional[Mapping] = None,
+        filter_fanout_values: bool = False,
         generate_new_cases: GenerateNewCases = "no",
         goal_features_map: t.Optional[Mapping] = None,
         initial_batch_size: t.Optional[int] = None,
@@ -1723,7 +1724,10 @@ def react(
             resulting value will be used as part of the context for following
             action features. The custom code will have access to all context
             feature values and previously generated action feature values.
-
+        filter_fanout_values : bool, default False
+            When true, predictions of features with fanned out values will be
+            made while holding out other cases that had the same values
+            duplicated.
         generate_new_cases : {"always", "attempt", "no"}, default "no"
             This parameter takes in a string that may be one of the following:
 
@@ -1868,6 +1872,7 @@ def react(
             feature_bounds_map=feature_bounds_map,
             feature_pre_process_code_map=feature_pre_process_code_map,
             feature_post_process_code_map=feature_post_process_code_map,
+            filter_fanout_values=filter_fanout_values,
             generate_new_cases=generate_new_cases,
             goal_features_map=goal_features_map,
             initial_batch_size=initial_batch_size,
@@ -1906,6 +1911,7 @@ def react_series(
         exclude_novel_nominals_from_uniqueness_check: bool = False,
         feature_bounds_map: t.Optional[Mapping[str, Mapping[str, t.Any]]] = None,
         feature_post_process_code_map: t.Optional[Mapping] = None,
+        filter_fanout_values: bool = False,
         final_time_steps: t.Optional[list[t.Any]] = None,
         generate_new_cases: GenerateNewCases = "no",
         goal_features_map: t.Optional[Mapping] = None,
@@ -2017,6 +2023,10 @@ def react_series(
             feature values and previously generated action feature values of
             the time-step being generated, as well as the feature values of all
             previously generated time-steps.
+        filter_fanout_values : bool, default False
+            When true, predictions of features with fanned out values will be
+            made while holding out other cases that had the same values
+            duplicated.
         final_time_steps: list of object, optional
             The time steps at which to end synthesis. Time-series only.
             Time-series only. Must provide either one for all series, or
@@ -2157,6 +2167,7 @@ def react_series(
                 exclude_novel_nominals_from_uniqueness_check=exclude_novel_nominals_from_uniqueness_check,
                 feature_bounds_map=feature_bounds_map,
                 feature_post_process_code_map=feature_post_process_code_map,
+                filter_fanout_values=filter_fanout_values,
                 final_time_steps=final_time_steps,
                 generate_new_cases=generate_new_cases,
                 goal_features_map=goal_features_map,
@@ -3582,6 +3593,7 @@ def react_aggregate(
         convergence_threshold: t.Optional[float] = None,
         features_to_derive: t.Optional[Collection[str]] = None,
         feature_influences_action_feature: t.Optional[str] = None,
+        filter_fanout_values: bool = False,
         forecast_window_length: t.Optional[float] = None,
         goal_dependent_features: t.Optional[Collection[str]] = None,
         goal_features_map: t.Optional[Mapping] = None,
@@ -3604,7 +3616,7 @@ def react_aggregate(
         value_robust_contributions_features: t.Optional[Collection[str]] = None,
         value_robust_contributions_num_buckets: int = 30,
         value_robust_contributions_min_samples: int = 15,
-        value_robust_contributions_min_cases: int = 15,
+        value_robust_contributions_min_cases: int | dict[str, int] = 15,
         weight_feature: t.Optional[str] = None,
     ) -> AggregateReaction:
         """
@@ -3811,6 +3823,10 @@ def react_aggregate(
             not providing this feature will return a matrix where each feature is used as an action feature. However,
             providing this feature if 'feature_robust_accuracy_contributions' is selected is still accepted, and will
             return just the feature influences for the selected feature.
+        filter_fanout_values : bool, default False
+            When true, predictions of features with fanned out values will be
+            made while holding out other cases that had the same values
+            duplicated.
         forecast_window_length : float, optional
             A value specifying a length of time over which to measure the accuracy of forecasts. When
             specified, returned prediction statistics and full residuals will be measuring the accuracy
@@ -3941,11 +3957,12 @@ def react_aggregate(
             The minumum number of samples required for a combination of feature values for its
             aggregated measure to be returned when computing the "value_robust_accuracy_contributions",
             "value_robust_prediction_contributions" or "value_robust_surprisal_asymmetry" details.
-        value_robust_contributions_min_cases: int, default 15
+        value_robust_contributions_min_cases: int or map of str to int, default 15
             The minimum number of unique cases for a given nominal class or continuous bucket to be
             used as a possible feature value when collecting all combinations of feature values in
-            the data to report metrics over. If unspecified, there is no filtering based on number
-            of unique cases.
+            the data to report metrics over. May be specified as a single value or a mapping of feature names to
+            values defining individual thresholds for each feature. If defined as a mapping, then any features without
+            defined thresholds will use a default value of 15.
         weight_feature : str, optional
             The name of feature whose values to use as case weights.
             When left unspecified uses the internally managed case weight.
@@ -3967,6 +3984,7 @@ def react_aggregate(
                 convergence_threshold=convergence_threshold,
                 features_to_derive=features_to_derive,
                 feature_influences_action_feature=feature_influences_action_feature,
+                filter_fanout_values=filter_fanout_values,
                 forecast_window_length=forecast_window_length,
                 goal_dependent_features=goal_dependent_features,
                 goal_features_map=goal_features_map,
diff --git a/howso/utilities/feature_attributes/base.py b/howso/utilities/feature_attributes/base.py
@@ -763,6 +763,7 @@ def _process(self,  # noqa: C901
                  datetime_feature_formats: t.Optional[dict] = None,
                  default_time_zone: t.Optional[str] = None,
                  dependent_features: t.Optional[dict[str, list[str]]] = None,
+                 fanout_feature_map: t.Optional[dict[tuple[str] | str, list[str]]] = None,
                  id_feature_name: t.Optional[str | Iterable[str]] = None,
                  include_extended_nominal_probabilities: t.Optional[bool] = False,
                  include_sample: bool = False,
@@ -1108,14 +1109,23 @@ def _process(self,  # noqa: C901
         # Validate datetimes after any user-defined features have been re-implemented
         self._validate_date_times()
 
+        # Configure the fanout feature attributes according to the input if given.
+        if fanout_feature_map:
+            for key_features, fanout_features in fanout_feature_map.items():
+                if isinstance(key_features, str):
+                    key_features = [key_features]
+                for f in fanout_features:
+                    if f in self.attributes:
+                        self.attributes[f]['fanout_on'] = list(key_features)
+
         # Re-order the keys like the original dataframe
         ordered_attributes = {}
         for fname in self.data.columns:
             # Check to see if the key is a sqlalchemy Column
             if hasattr(fname, 'name'):
                 fname = fname.name
             if fname not in self.attributes.keys():
-                warnings.warn(f'Feature {fname} exists in provided data but was not computed in feature attributes')
+                warnings.warn(f'Feature {fname} exists in provided data but was not computed in feature attributes.')
                 continue
             ordered_attributes[fname] = self.attributes[fname]
 
diff --git a/howso/utilities/feature_attributes/infer_feature_attributes.py b/howso/utilities/feature_attributes/infer_feature_attributes.py
@@ -119,6 +119,11 @@ def infer_feature_attributes(data: pd.DataFrame | SQLRelationalDatastoreProtocol
         to 2 will synthesize the 3rd order derivative value, and then use
         that synthed value to derive the 2nd and 1st order.
 
+    fanout_feature_map : dict of str or tuple of str to list of str, optional
+        (Optional) Dict mapping "key" feature names or tuples of "key" feature names to list of "fanout" feature names.
+        Fanout features are features with values fanned out across multiple cases. Key features are features
+        whose values can be used to select groups of cases that have the same duplicated fanout values.
+
     id_feature_name : str or list of str, default None
         (Optional) The name(s) of the ID feature(s).
 
diff --git a/howso/utilities/feature_attributes/time_series.py b/howso/utilities/feature_attributes/time_series.py
@@ -330,6 +330,7 @@ def _process(  # noqa: C901
         delta_boundaries: t.Optional[dict] = None,
         dependent_features: t.Optional[dict] = None,
         derived_orders: t.Optional[dict] = None,
+        fanout_feature_map: t.Optional[dict[str | tuple[str], list[str]]] = None,
         id_feature_name: t.Optional[str | Iterable[str]] = None,
         include_extended_nominal_probabilities: t.Optional[bool] = False,
         include_sample: bool = False,
@@ -448,6 +449,11 @@ def _process(  # noqa: C901
             to 2 will synthesize the 3rd order derivative value, and then use
             that synthed value to derive the 2nd and 1st order.
 
+        fanout_feature_map : dict of str or tuple of str to list of str, optional
+            (Optional) Dict mapping "key" feature names or tuples of "key" feature names to list of "fanout" feature names.
+            Fanout features are features with values fanned out across multiple cases. Key features are features
+            whose values can be used to select groups of cases that have the same duplicated fanout values.
+
         id_feature_name : str or list of str default None
             (Optional) The name(s) of the ID feature(s).
 
@@ -633,6 +639,7 @@ def _process(  # noqa: C901
             datetime_feature_formats=datetime_feature_formats,
             default_time_zone=default_time_zone,
             dependent_features=dependent_features,
+            fanout_feature_map=fanout_feature_map,
             id_feature_name=id_feature_name,
             include_extended_nominal_probabilities=include_extended_nominal_probabilities,
             include_sample=include_sample,
diff --git a/version.json b/version.json
@@ -1,5 +1,5 @@
 {
   "dependencies": {
-    "howso-engine": "110.3.0"
+    "howso-engine": "110.5.0"
   }
 }

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`{`
`2`	`2`	`"dependencies": {`
`3`		`- "howso-engine": "110.3.0"`
	`3`	`+ "howso-engine": "110.5.0"`
`4`	`4`	`}`
`5`	`5`	`}`