Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 34f2144

Browse files
authored
Implement Series.str.zfill() (#397)
* Implement Series.str.zfill() * Minor change in tests for Series.str.zfill()
1 parent bdcfe14 commit 34f2144

File tree

4 files changed

+137
-1
lines changed

4 files changed

+137
-1
lines changed

examples/series_str_zfill.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# *****************************************************************************
2+
# Copyright (c) 2019, Intel Corporation All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# Redistributions of source code must retain the above copyright notice,
8+
# this list of conditions and the following disclaimer.
9+
#
10+
# Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
18+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21+
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23+
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
# *****************************************************************************
26+
27+
import pandas as pd
28+
from numba import njit
29+
30+
31+
@njit
32+
def series_str_zfill():
33+
series = pd.Series(['dog', 'foo', 'bar']) # Series of 'dog', 'foo', 'bar'
34+
out_series = series.str.zfill(5)
35+
36+
return out_series # Expect series of '00dog', '00foo', '00bar'
37+
38+
39+
print(series_str_zfill())

sdc/datatypes/hpat_pandas_stringmethods_functions.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,72 @@ def hpat_pandas_stringmethods_startswith_impl(self, pat, na=None):
613613
return hpat_pandas_stringmethods_startswith_impl
614614

615615

616+
@overload_method(StringMethodsType, 'zfill')
617+
def hpat_pandas_stringmethods_zfill(self, width):
618+
"""
619+
Intel Scalable Dataframe Compiler User Guide
620+
********************************************
621+
Pandas API: pandas.Series.str.zfill
622+
623+
Examples
624+
--------
625+
.. literalinclude:: ../../../examples/series_str_zfill.py
626+
:language: python
627+
:lines: 27-
628+
:caption: Pad strings in the Series by prepending '0' characters
629+
:name: ex_series_str_zfill
630+
631+
.. code-block:: console
632+
633+
> python ./series_str_zfill.py
634+
0 00dog
635+
1 00foo
636+
2 00bar
637+
dtype: object
638+
639+
.. todo:: Add support of 32-bit Unicode for `str.zfill()`
640+
641+
Intel Scalable Dataframe Compiler Developer Guide
642+
*************************************************
643+
644+
Pandas Series method :meth:`pandas.core.strings.StringMethods.zfill()` implementation.
645+
646+
Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.
647+
648+
.. only:: developer
649+
650+
Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_zfill
651+
652+
Parameters
653+
----------
654+
self: :class:`pandas.core.strings.StringMethods`
655+
input arg
656+
width: :obj:`int`
657+
Minimum width of resulting string
658+
659+
Returns
660+
-------
661+
:obj:`pandas.Series`
662+
returns :obj:`pandas.Series` object
663+
"""
664+
665+
ty_checker = TypeChecker('Method zfill().')
666+
ty_checker.check(self, StringMethodsType)
667+
668+
if not isinstance(width, Integer):
669+
ty_checker.raise_exc(width, 'int', 'width')
670+
671+
def hpat_pandas_stringmethods_zfill_impl(self, width):
672+
item_count = len(self._data)
673+
result = [''] * item_count
674+
for idx, item in enumerate(self._data._data):
675+
result[idx] = item.zfill(width)
676+
677+
return pandas.Series(result, self._data._index, name=self._data._name)
678+
679+
return hpat_pandas_stringmethods_zfill_impl
680+
681+
616682
def _hpat_pandas_stringmethods_autogen(method_name):
617683
""""
618684
The function generates a function for 'method_name' from source text that is created on the fly.

sdc/hiframes/pd_series_ext.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -760,7 +760,7 @@ def resolve_head(self, ary, args, kws):
760760

761761
str2str_methods_excluded = [
762762
'upper', 'center', 'endswith', 'find', 'isupper', 'len', 'ljust',
763-
'lower', 'lstrip', 'rjust', 'rstrip', 'startswith', 'strip'
763+
'lower', 'lstrip', 'rjust', 'rstrip', 'startswith', 'strip', 'zfill'
764764
]
765765
"""
766766
Functions which are used from Numba directly by calling from StringMethodsType

sdc/tests/test_series.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2692,6 +2692,37 @@ def test_impl(series, pat, na):
26922692
msg = msg_tmpl.format('expected: None')
26932693
self.assertIn(msg, str(raises.exception))
26942694

2695+
def test_series_str_zfill(self):
2696+
def test_impl(series, width):
2697+
return series.str.zfill(width)
2698+
2699+
hpat_func = self.jit(test_impl)
2700+
2701+
data = test_global_input_data_unicode_kind1
2702+
data_lengths = [len(s) for s in data]
2703+
2704+
for index in [None, list(range(len(data)))[::-1], data[::-1]]:
2705+
series = pd.Series(data, index, name='A')
2706+
for width in [max(data_lengths) + 5, min(data_lengths)]:
2707+
jit_result = hpat_func(series, width)
2708+
ref_result = test_impl(series, width)
2709+
pd.testing.assert_series_equal(jit_result, ref_result)
2710+
2711+
def test_series_str_zfill_exception_unsupported_kind4(self):
2712+
def test_impl(series, width):
2713+
return series.str.zfill(width)
2714+
2715+
hpat_func = self.jit(test_impl)
2716+
2717+
data = test_global_input_data_unicode_kind4
2718+
series = pd.Series(data)
2719+
width = max(len(s) for s in data) + 5
2720+
2721+
with self.assertRaises(SystemError) as raises:
2722+
hpat_func(series, width)
2723+
msg = 'NULL object passed to Py_BuildValue'
2724+
self.assertIn(msg, str(raises.exception))
2725+
26952726
def test_series_str2str(self):
26962727
common_methods = ['lower', 'upper', 'isupper']
26972728
sdc_methods = ['capitalize', 'swapcase', 'title',

0 commit comments

Comments
 (0)