Skip to content

Commit 46ed6b1

Browse files
API/BUG: freq retention in value_counts (pandas-dev#62532)
Co-authored-by: William Ayd <[email protected]>
1 parent 6e42b1f commit 46ed6b1

File tree

3 files changed

+93
-0
lines changed

3 files changed

+93
-0
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -999,6 +999,7 @@ Datetimelike
999999
- Bug in comparison between objects with pyarrow date dtype and ``timestamp[pyarrow]`` or ``np.datetime64`` dtype failing to consider these as non-comparable (:issue:`62157`)
10001000
- Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`)
10011001
- Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`)
1002+
- Bug in retaining frequency in :meth:`value_counts` specifically for :meth:`DatetimeIndex` and :meth:`TimedeltaIndex` (:issue:`33830`)
10021003
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
10031004

10041005
Timedelta

pandas/core/algorithms.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -868,8 +868,10 @@ def value_counts_internal(
868868
dropna: bool = True,
869869
) -> Series:
870870
from pandas import (
871+
DatetimeIndex,
871872
Index,
872873
Series,
874+
TimedeltaIndex,
873875
)
874876

875877
index_name = getattr(values, "name", None)
@@ -934,6 +936,17 @@ def value_counts_internal(
934936
# Starting in 3.0, we no longer perform dtype inference on the
935937
# Index object we construct here, xref GH#56161
936938
idx = Index(keys, dtype=keys.dtype, name=index_name)
939+
940+
if (
941+
bins is None
942+
and not sort
943+
and isinstance(values, (DatetimeIndex, TimedeltaIndex))
944+
and idx.equals(values)
945+
and values.inferred_freq is not None
946+
):
947+
# Preserve freq of original index
948+
idx.freq = values.inferred_freq # type: ignore[attr-defined]
949+
937950
result = Series(counts, index=idx, name=name, copy=False)
938951

939952
if sort:

pandas/tests/base/test_value_counts.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
Series,
1515
Timedelta,
1616
TimedeltaIndex,
17+
Timestamp,
1718
array,
1819
)
1920
import pandas._testing as tm
@@ -339,3 +340,81 @@ def test_value_counts_object_inference_deprecated():
339340
exp = dti.value_counts()
340341
exp.index = exp.index.astype(object)
341342
tm.assert_series_equal(res, exp)
343+
344+
345+
@pytest.mark.parametrize(
346+
("index", "expected_index"),
347+
[
348+
[
349+
pd.date_range("2016-01-01", periods=5, freq="D"),
350+
pd.date_range("2016-01-01", periods=5, freq="D"),
351+
],
352+
[
353+
pd.timedelta_range(Timedelta(0), periods=5, freq="h"),
354+
pd.timedelta_range(Timedelta(0), periods=5, freq="h"),
355+
],
356+
[
357+
DatetimeIndex(
358+
[Timestamp("2016-01-01") + Timedelta(days=i) for i in range(1)]
359+
+ [Timestamp("2016-01-02")]
360+
+ [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(1, 5)]
361+
),
362+
DatetimeIndex(pd.date_range("2016-01-01", periods=5, freq="D")),
363+
],
364+
[
365+
TimedeltaIndex(
366+
[Timedelta(hours=i) for i in range(1)]
367+
+ [Timedelta(hours=1)]
368+
+ [Timedelta(hours=i) for i in range(1, 5)],
369+
),
370+
TimedeltaIndex(pd.timedelta_range(Timedelta(0), periods=5, freq="h")),
371+
],
372+
[
373+
DatetimeIndex(
374+
[Timestamp("2016-01-01") + Timedelta(days=i) for i in range(2)]
375+
+ [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(3, 5)],
376+
),
377+
DatetimeIndex(
378+
[Timestamp("2016-01-01") + Timedelta(days=i) for i in range(2)]
379+
+ [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(3, 5)],
380+
),
381+
],
382+
[
383+
TimedeltaIndex(
384+
[Timedelta(hours=i) for i in range(2)]
385+
+ [Timedelta(hours=i) for i in range(3, 5)],
386+
),
387+
TimedeltaIndex(
388+
[Timedelta(hours=i) for i in range(2)]
389+
+ [Timedelta(hours=i) for i in range(3, 5)],
390+
),
391+
],
392+
[
393+
DatetimeIndex(
394+
[Timestamp("2016-01-01")]
395+
+ [pd.NaT]
396+
+ [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(1, 5)],
397+
),
398+
DatetimeIndex(
399+
[Timestamp("2016-01-01")]
400+
+ [pd.NaT]
401+
+ [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(1, 5)],
402+
),
403+
],
404+
[
405+
TimedeltaIndex(
406+
[Timedelta(hours=0)]
407+
+ [pd.NaT]
408+
+ [Timedelta(hours=i) for i in range(1, 5)],
409+
),
410+
TimedeltaIndex(
411+
[Timedelta(hours=0)]
412+
+ [pd.NaT]
413+
+ [Timedelta(hours=i) for i in range(1, 5)],
414+
),
415+
],
416+
],
417+
)
418+
def test_value_counts_index_datetimelike(index, expected_index):
419+
vc = index.value_counts(sort=False, dropna=False)
420+
tm.assert_index_equal(vc.index, expected_index)

0 commit comments

Comments
 (0)