howsoai · apbassett · Feb 4, 2026 · Feb 4, 2026 · Feb 4, 2026 · Feb 4, 2026
@@ -1440,89 +1440,3 @@ def test_tokenizable_strings_reaction(self):
         )
         assert reaction["action"].iloc[0]["review"] == df.iloc[0]["review"]
         assert reaction["details"]["influential_cases"][0].iloc[0]["review"] == df.iloc[0]["review"]
-
-    def test_json_feature_types(self):
-        """Test that JSON features stored as Python data structures have their primitive types maintained."""
-        tests = [
-            ({"a": "str", "b": "1", "c": "2.7", "d": True, "e": {"a1": "str", "b1": {"c1": [1, 2, 3]}}},
-            {"a": "string", "b": "integer", "c": "numeric", "d": "boolean", "e": {"a1": "string", "b1": {"c1": "integer"}}}),
-            ({"a": "str", "b": "1", "c": "3.3", "d": False, "e": {"a1": "str2", "b1": {"c1": [1, 2, 3, 4, 5, 6, 7]}}},
-            {"a": "string", "b": "integer", "c": "numeric", "d": "boolean", "e": {"a1": "string", "b1": {"c1": "integer"}}}),
-            ({"a": 3, "b": 1.5, "c": 2.7, "d": True, "e": {"a1": 5, "b1": {"c1": [1, 2, 3]}}},
-            {"a": "integer", "b": "numeric", "c": "numeric", "d": "boolean", "e": {"a1": "integer", "b1": {"c1": "integer"}}}),
-            ({"a": 3, "b": 1, "c": 2.7, "d": True, "e": {"a1": "str", "b1": {"c1": [1, True, "foo"]}}},
-            {"a": "integer", "b": "integer", "c": "numeric", "d": "boolean", "e": {"a1": "string", "b1": {"c1": "object"}}}),
-        ]
-        data_uniform_types = pd.DataFrame({"foo": [tests[0][0], tests[1][0]], "bar": ["a", "b"]})
-        data_uniform_except_list = pd.DataFrame({"foo": [tests[2][0], tests[3][0]], "bar": ["a", "b"]})
-        data_non_uniform = pd.DataFrame({"foo": [tests[0][0], tests[1][0], tests[2][0]], "bar": ["a", "b", "c"]})
-
-        # Types should be preserved with no warnings (dict)
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            feature_attributes = infer_feature_attributes(data_uniform_types)
-            assert feature_attributes["foo"]["data_type"] == "json"
-            assert feature_attributes["foo"]["type"] == "continuous"
-            assert feature_attributes["foo"]["original_type"]["data_type"] == "container"
-            client = HowsoClient()
-            t = Trainee()
-            client.set_feature_attributes(t.id, feature_attributes)
-            client.train(t.id, data_uniform_types)
-            reaction = client.react(
-                t.id,
-                contexts=[["a"]],
-                context_features=['bar'],
-                action_features=['foo'],
-                details={"influential_cases": True},
-                desired_conviction=5,
-            )
-            # Cannot compare
-            assert reaction["action"].iloc[0]["foo"] == data_uniform_types.iloc[0]["foo"]
-            assert reaction["details"]["influential_cases"][0].iloc[0]["foo"] == tests[0][0]
-
-        # All types except for the nested list should be preserved and a warning issued
-        with pytest.warns(match="contains a key 'c1' whose value is a list of mixed types"):
-            feature_attributes = infer_feature_attributes(data_uniform_except_list)
-            assert feature_attributes["foo"]["data_type"] == "json"
-            assert feature_attributes["foo"]["type"] == "continuous"
-            assert feature_attributes["foo"]["original_type"]["data_type"] == "container"
-            client = HowsoClient()
-            t = Trainee()
-            client.set_feature_attributes(t.id, feature_attributes)
-            client.train(t.id, data_uniform_except_list)
-            reaction = client.react(
-                t.id,
-                contexts=[["b"]],
-                context_features=['bar'],
-                action_features=['foo'],
-                details={"influential_cases": True},
-                desired_conviction=5,
-            )
-            expected_case = deepcopy(tests[3][0])
-            # The list under this key has mixed types so it will come back as-is when deserialized
-            expected_case["e"]["b1"]["c1"] = json.loads(json.dumps(expected_case["e"]["b1"]["c1"]))
-            assert reaction["action"].iloc[0]["foo"] == expected_case
-            assert reaction["details"]["influential_cases"][0].iloc[0]["foo"] == expected_case
-
-        # Types cannot be preserved, warning issued
-        with pytest.warns(match="inconsistent types and/or keys across cases."):
-            feature_attributes = infer_feature_attributes(data_non_uniform)
-            assert feature_attributes["foo"]["data_type"] == "json"
-            assert feature_attributes["foo"]["type"] == "continuous"
-            assert feature_attributes["foo"]["original_type"]["data_type"] == "container"
-            client = HowsoClient()
-            t = Trainee()
-            client.set_feature_attributes(t.id, feature_attributes)
-            client.train(t.id, data_non_uniform)
-            reaction = client.react(
-                t.id,
-                contexts=[["a"]],
-                context_features=['bar'],
-                action_features=['foo'],
-                details={"influential_cases": True},
-                desired_conviction=5,
-            )
-            # Cases of "foo" have mixed types so they will come back as-is when deserialized
-            expected_case = json.loads(json.dumps(tests[0][0]))
-            assert reaction["action"].iloc[0]["foo"] == expected_case
-            assert reaction["details"]["influential_cases"][0].iloc[0]["foo"] == expected_case
@@ -6,8 +6,6 @@
     SingleTableFeatureAttributes,
 )
 from .features import (  # noqa: F401
-    cast_primitive_from_feature_type,
-    convert_primitive_to_feature_type,
     deserialize_cases,
     FeatureType,
     format_column,
@@ -65,8 +63,6 @@
     "align_data",
     "build_react_series_df",
     "check_feature_names",
-    "cast_primitive_from_feature_type",
-    "convert_primitive_to_feature_type",
     "format_confusion_matrix",
     "date_format_is_iso",
     "date_to_epoch",

@@ -21,7 +21,7 @@
 import pandas as pd
 import yaml
 
-from howso.utilities.features import convert_primitive_to_feature_type, FeatureType
+from howso.utilities.features import FeatureType
 from howso.utilities.utilities import is_valid_datetime_format, time_to_seconds
 from ..utilities import determine_iso_format
 
@@ -1148,74 +1148,6 @@ def _infer_boolean_attributes(self, feature_name: str) -> dict:
     def _infer_integer_attributes(self, feature_name: str) -> dict:
         """Get inferred attributes for the given integer column."""
 
-    def _get_primitive_type_schema(self, feature_name: str) -> dict:  # noqa: C901
-        """Get a map of keys to types for a JSON feature stored as a Python dict or list."""
-        # If there is no data, return False
-        first_non_none = self._get_first_non_null(feature_name)
-        if first_non_none is None:
-            return False
-
-        # Keep track of whether there are any non-primitive types in the data
-        has_complex_type = False
-
-        def _recursive_get_types(data: t.Any, key: str = None) -> dict:
-            """Recursively determine primitive types for an arbitrary Python data structure."""
-            nonlocal has_complex_type
-            # Value is a list
-            if isinstance(data, MutableSequence):
-                list_type = FeatureType.UNKNOWN.value
-                # Iterate through 10 random values of a list of len>10, or through the entire list if len<=10.
-                iterations = min(len(data), 10)
-                if len(data) > 10:
-                    # Shuffle data so that the first 10 indices are randomized
-                    data = list(pd.Series(data).sample(frac=1))
-                for idx in range(iterations):
-                    rand_val = data[idx]
-                    if rand_val is None:
-                        # We can still retain primitive types with NoneTypes present in the data structure
-                        continue
-                    elif list_type == FeatureType.UNKNOWN.value:
-                        list_type = convert_primitive_to_feature_type(rand_val)
-                    elif list_type != convert_primitive_to_feature_type(rand_val):
-                        warnings.warn(f"JSON feature '{feature_name}' contains a key '{key}' whose value is a list of "
-                                      "mixed types. Original types under this key will not be preserved.")
-                        return FeatureType.UNKNOWN.value
-                    elif list_type == FeatureType.UNKNOWN.value:
-                        # A non-primitive type was found in the data
-                        has_complex_type = True
-                return list_type
-            # Base case: not a list or dict
-            elif not isinstance(data, Mapping):
-                return convert_primitive_to_feature_type(data)
-            # Value is a dict
-            return {key: _recursive_get_types(data[key], key=key) for key in data.keys()}
-
-        # Sample up to 10 random values
-        # OR every value if < 10
-        type_maps = []
-        if (count := self._get_unique_count(feature_name)) < 10:
-            for sample in self._get_unique_values(feature_name):
-                type_maps.append(_recursive_get_types(sample))
-        else:
-            count = 10
-            for idx in range(10):
-                sample = self._get_random_value(feature_name, no_nulls=True)
-                type_maps.append(_recursive_get_types(sample))
-
-        # Issue a warning if keys or types are not consistent across cases
-        for idx in range(1, count):
-            if type_maps[0] != type_maps[idx]:
-                warnings.warn(f"JSON feature '{feature_name} has inconsistent types and/or keys across cases. "
-                              "Original types will not be preserved.")
-                return
-
-        # Issue a warning if any non-primitive types were found
-        if has_complex_type:
-            warnings.warn(f"JSON feature '{feature_name}' contains at least one instance of a non-primitive type. "
-                          "Only uniform, primitive types will be preserved in semistructured features.")
-
-        return type_maps[0]
-
     def _infer_string_attributes(self, feature_name: str) -> dict:
         """Get inferred attributes for the given string column."""
         # Column has arbitrary string values, first check if they
@@ -1239,11 +1171,10 @@ def _infer_string_attributes(self, feature_name: str) -> dict:
         elif self._is_json_feature(feature_name):
             first_non_null = self._get_first_non_null(feature_name)
             if isinstance(first_non_null, Mapping) or isinstance(first_non_null, MutableSequence):
-                type_map = self._get_primitive_type_schema(feature_name) or {}
                 return {
                     "type": "continuous",
                     "data_type": "json",
-                    "original_type": {"type_map": type_map, "data_type": FeatureType.CONTAINER.value},
+                    "original_type": {"data_type": FeatureType.CONTAINER.value},
                 }
             return {
                 "type": "continuous",

@@ -1084,44 +1084,3 @@ def test_infer_tokenizable_string():
     # Product should still be a nominal string
     assert feature_attributes["product"]["data_type"] == "string"
     assert feature_attributes["product"]["type"] == "nominal"
-
-
-def test_json_features_types():
-    """Test that IFA includes type information for JSON features that are Python dicts/lists."""
-    tests = [
-        ({"a": "str", "b": 1, "c": 2.7, "d": True, "e": {"a1": "str", "b1": {"c1": [1, 2, 3]}}},
-         {"a": "string", "b": "integer", "c": "numeric", "d": "boolean", "e": {"a1": "string", "b1": {"c1": "integer"}}}),
-        ({"a": "str", "b": 9, "c": 3.3, "d": False, "e": {"a1": "str2", "b1": {"c1": [1, 2, 3, 4, 5, 6, 7]}}},
-         {"a": "string", "b": "integer", "c": "numeric", "d": "boolean", "e": {"a1": "string", "b1": {"c1": "integer"}}}),
-        ({"a": 3, "b": 1.5, "c": 2.7, "d": True, "e": {"a1": 5, "b1": {"c1": [1, 2, 3]}}},
-         {"a": "integer", "b": "numeric", "c": "numeric", "d": "boolean", "e": {"a1": "integer", "b1": {"c1": "integer"}}}),
-        ({"a": 3, "b": 1, "c": 2.7, "d": True, "e": {"a1": "str", "b1": {"c1": [1, True, "foo"]}}},
-         {"a": "integer", "b": "integer", "c": "numeric", "d": "boolean", "e": {"a1": "string", "b1": {"c1": "object"}}}),
-        ([1.1, 2.2, 3.3, 4.4], "numeric"),
-    ]
-    # First test that the type maps are correct
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore")
-        for test in tests:
-            df = pd.DataFrame({"foo": [test[0]]})
-            attributes = infer_feature_attributes(df)
-            assert attributes["foo"]["data_type"] == "json"
-            assert attributes["foo"]["original_type"]["data_type"] == FeatureType.CONTAINER.value
-            assert attributes["foo"]["original_type"]["type_map"] == test[1]
-    with warnings.catch_warnings():
-        warnings.simplefilter("error")
-        # Ensure no warnings also with multiple cases of the same schema
-        df = pd.DataFrame({"foo": [tests[0][0], tests[1][0]]})
-        attributes = infer_feature_attributes(df)
-        assert attributes["foo"]["data_type"] == "json"
-        assert attributes["foo"]["original_type"]["data_type"] == FeatureType.CONTAINER.value
-        assert attributes["foo"]["original_type"]["type_map"] == tests[0][1]
-
-    # Test applicable warnings
-    with pytest.warns(match="contains a key 'c1' whose value is a list of mixed types"):
-        df = pd.DataFrame({"foo": [tests[3][0]]})
-        infer_feature_attributes(df)
-    with pytest.warns(match="inconsistent types and/or keys across cases."):
-        df = pd.DataFrame({"foo": [tests[0][0], tests[2][0], tests[1][0]]})
-        attributes = infer_feature_attributes(df)
-        assert not attributes["foo"]["original_type"]["type_map"]
@@ -602,23 +602,3 @@ def test_infer_tokenizable_string(adc):
     # Product should still be a nominal string
     assert feature_attributes["product"]["data_type"] == "string"
     assert feature_attributes["product"]["type"] == "nominal"
-
-
-@pytest.mark.parametrize('adc', [
-    # Only MongoDBData and DataFrameData support Python objects as data
-    ("MongoDBData", pd.DataFrame()),
-    ("DataFrameData", pd.DataFrame()),
-], indirect=True)
-def test_json_features_types(adc):
-    """Test that IFA includes type information for JSON features that are Python dicts/lists for applicable ADCs."""
-    test = (
-        {"a": "str", "b": 1, "c": 2.7, "d": True, "e": {"a1": "str", "b1": {"c1": [1, 2, 3]}}},
-        {"a": "string", "b": "integer", "c": "numeric", "d": "boolean", "e": {"a1": "string", "b1": {"c1": "integer"}}}
-    )
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore")
-        df = pd.DataFrame({"foo": [test[0]]})
-        convert_data(DataFrameData(df), adc)
-        attributes = infer_feature_attributes(adc)
-        assert attributes["foo"]["data_type"] == "json"
-        assert attributes["foo"]["original_type"]["type_map"] == test[1]
@@ -38,8 +38,6 @@
 
 
 __all__ = [
-    "cast_primitive_from_feature_type",
-    "convert_primitive_to_feature_type",
     "FeatureSerializer",
     "FeatureType",
     "deserialize_cases",
@@ -67,38 +65,6 @@ def __str__(self):
         """Return a string representation."""
         return str(self.value)
 
-
-def cast_primitive_from_feature_type(data: int | float | str | bool, new_type: str):
-    """Cast a primitive value to the provided FeatureType value if it does not match."""
-    try:
-        if new_type == FeatureType.STRING.value and not isinstance(data, str):
-            return str(data)
-        elif new_type == FeatureType.BOOLEAN.value and not isinstance(data, bool):
-            return bool(data)
-        elif new_type == FeatureType.INTEGER.value and not isinstance(data, int):
-            return int(data)
-        elif new_type == FeatureType.FLOAT.value and not isinstance(data, float):
-            return float(data)
-    except Exception:  # noqa: Intentionally broad
-        # This is a QoL operation and it should not stop execution if there is a problem
-        pass
-    return data
-
-
-def convert_primitive_to_feature_type(value: t.Any):
-    """Convert a primitive value's data type to FeatureType. Returns 'object' if not primitive."""
-    if isinstance(value, str):
-        return FeatureType.STRING.value
-    elif isinstance(value, bool):
-        return FeatureType.BOOLEAN.value
-    elif isinstance(value, int):
-        return FeatureType.INTEGER.value
-    elif isinstance(value, float):
-        return FeatureType.NUMERIC.value
-    # A non-primitive type
-    return FeatureType.UNKNOWN
-
-
 class FeatureSerializer:
     """Adapter for serialization and deserialization of feature data."""
 

@@ -808,18 +808,7 @@ def stringify_json(cases: list[list[t.Any]], features: Iterable[str], feature_at
                 case_group[idx] = json.dumps(case_group[idx])
 
 
-def _convert_json_subtypes(data: t.Any, type_map: dict[str, t.Any] | t.Any):
-    """Recursively convert primitive types according to the type map for an arbitrary Python data structure."""
-    # Avoid circular import
-    from .features import cast_primitive_from_feature_type
-    if isinstance(data, list):
-        return [cast_primitive_from_feature_type(d, type_map) for d in data]
-    elif not isinstance(data, Mapping):
-        return cast_primitive_from_feature_type(data, type_map)
-    return {key: _convert_json_subtypes(data[key], type_map.get(key, "object")) for key in data.keys()}
-
-
-def destringify_json(cases: pd.Series, feature_attributes: Mapping) -> None:
+def destringify_json(cases: pd.Series, feature_attributes: Mapping) -> pd.Series:
     """
     Ensures that any JSON features have their cases destringified.
 
@@ -833,9 +822,6 @@ def destringify_json(cases: pd.Series, feature_attributes: Mapping) -> None:
     destringified_cases = []
     for case_to_destringify in cases:
         formatted_case = json.loads(case_to_destringify)
-        type_map = feature_attributes.get("original_type", {}).get("type_map")
-        if type_map:
-            formatted_case = _convert_json_subtypes(formatted_case, type_map)
         destringified_cases.append(formatted_case)
     return pd.Series(destringified_cases)