From ffb1842fdc671c426055bbb5666ad4085ff7a53b Mon Sep 17 00:00:00 2001 From: Gene Huang Date: Tue, 16 Dec 2025 15:36:24 -0800 Subject: [PATCH] INTERNAL PiperOrigin-RevId: 845460559 --- ml_metrics/_src/utils/df_utils.py | 94 -------------------------- ml_metrics/_src/utils/df_utils_test.py | 59 ---------------- ml_metrics/utils.py | 6 -- 3 files changed, 159 deletions(-) delete mode 100644 ml_metrics/_src/utils/df_utils.py delete mode 100644 ml_metrics/_src/utils/df_utils_test.py diff --git a/ml_metrics/_src/utils/df_utils.py b/ml_metrics/_src/utils/df_utils.py deleted file mode 100644 index 60ab0908..00000000 --- a/ml_metrics/_src/utils/df_utils.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the 'License'); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an 'AS IS' BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Utils for generating DataFrames.""" - -from __future__ import annotations - -import collections -from collections.abc import Callable, Iterable -import copy -from typing import Any - -import chainable -from ml_metrics._src.tools.telemetry import telemetry -import pandas as pd - - -_METRIC_NAME = 'metric_name' -_SLICE = 'slice' -_VALUE = 'value' - - -def _first_or_tuple(x: tuple[Any, ...]) -> tuple[Any, ...] | Any: - if isinstance(x, tuple) and len(x) == 1: - return x[0] - return x - - -def index(a: Iterable[Any], key_fn: Callable[[Any], Any] | None = None): - if key_fn is not None: - return {key_fn(r): r for r in a} - return {i: r for i, r in enumerate(a)} - - -def merge( - a: Iterable[Any], - b: Iterable[Any], - *, - key_fn: Callable[[Any], Any], - inplace: bool = False, -) -> Iterable[Any]: - """Merges two iterables joined by the key calculated by the key_fn.""" - key2row_b = index(b, key_fn) - if not inplace: - a = [copy.copy(r) for r in a] - for r_a in a: - r_a.update(key2row_b[key_fn(r_a)].items()) - return a - - -_StrOrMetricKey = chainable.MetricKey | str - - -@telemetry.function_monitor(api='ml_metrics', category=telemetry.CATEGORY.UTIL) -def metrics_to_df(metrics: dict[_StrOrMetricKey, Any]) -> pd.DataFrame: - """Converts the aggregation result to a DataFrame. - - This always converts the dict aggregation result to a DataFrame with - the following columns: - - - metric_name: the name of the metric. - - slice: the slice of the metric, if a slice is not specified, it will be - 'overall'. - - value: the value of the metric. - - Args: - metrics: the aggregation result. - - Returns: - A DataFrame with the above columns. - """ - sliced_results = collections.defaultdict(list) - for k, v in metrics.items(): - if isinstance(k, str): - sliced_results[_METRIC_NAME].append(k) - sliced_results[_SLICE].append('overall') - sliced_results[_VALUE].append(v) - elif isinstance(k, chainable.MetricKey): - sliced_results[_METRIC_NAME].append(k.metrics) - slice_name = _first_or_tuple(k.slice.features) - slice_value = _first_or_tuple(k.slice.values) - sliced_results[_SLICE].append(f'{slice_name}={slice_value}') - sliced_results[_VALUE].append(v) - return pd.DataFrame(sliced_results) diff --git a/ml_metrics/_src/utils/df_utils_test.py b/ml_metrics/_src/utils/df_utils_test.py deleted file mode 100644 index 95cdd3e8..00000000 --- a/ml_metrics/_src/utils/df_utils_test.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the 'License'); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an 'AS IS' BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from absl.testing import absltest -import chainable -from ml_metrics._src.utils import df_utils -import pandas as pd - - -class DfUtilsTest(absltest.TestCase): - - def test_as_dataframe_default(self): - key = chainable.MetricKey( - metrics='m2', slice=chainable.SliceKey(('f1',), ('a',)) - ) - agg_result = {'m1': 1, key: 2} - df = df_utils.metrics_to_df(agg_result) - pd.testing.assert_frame_equal( - df, - pd.DataFrame({ - 'metric_name': ['m1', 'm2'], - 'slice': ['overall', 'f1=a'], - 'value': [1, 2], - }), - ) - - def test_index(self): - a = [{'a': 1, 'b': 2}, {'a': 3, 'b': 4}] - expected = {1: {'a': 1, 'b': 2}, 3: {'a': 3, 'b': 4}} - self.assertEqual(df_utils.index(a, lambda x: x['a']), expected) - - def test_merge(self): - a = [{'a': 1, 'b': 2}, {'a': 3, 'b': 4}] - b = [{'a': 1, 'c': 3}, {'a': 3, 'c': 5}] - expected = [{'a': 1, 'b': 2, 'c': 3}, {'a': 3, 'b': 4, 'c': 5}] - self.assertEqual(expected, df_utils.merge(a, b, key_fn=lambda x: x['a'])) - - def test_merge_inplace(self): - a = [{'a': 1, 'b': 2}, {'a': 3, 'b': 4}] - b = [{'a': 1, 'c': 3}, {'a': 3, 'c': 5}] - expected = [{'a': 1, 'b': 2, 'c': 3}, {'a': 3, 'b': 4, 'c': 5}] - actual = df_utils.merge(a, b, key_fn=lambda x: x['a'], inplace=True) - self.assertIs(actual, a) - self.assertEqual(expected, a) - - -if __name__ == '__main__': - absltest.main() diff --git a/ml_metrics/utils.py b/ml_metrics/utils.py index d63c58b8..97b012ab 100644 --- a/ml_metrics/utils.py +++ b/ml_metrics/utils.py @@ -15,9 +15,3 @@ # pylint: disable=g-importing-member # pylint: disable=unused-import -from ml_metrics._src.utils.df_utils import index -from ml_metrics._src.utils.df_utils import merge -from ml_metrics._src.utils.df_utils import metrics_to_df -from ml_metrics._src.utils.proto_utils import dict_to_tf_example -from ml_metrics._src.utils.proto_utils import tf_example_to_dict -from ml_metrics._src.utils.proto_utils import tf_examples_to_dict