Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit bdcfe14

Browse files
authored
Implement Series.str.ljust()/rjust() (#392)
* Implement Series.str.ljust()/rjust() * Add examples and documentation for str.ljust/rjust
1 parent 81208c7 commit bdcfe14

File tree

5 files changed

+292
-2
lines changed

5 files changed

+292
-2
lines changed

examples/series_str_ljust.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# *****************************************************************************
2+
# Copyright (c) 2019, Intel Corporation All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# Redistributions of source code must retain the above copyright notice,
8+
# this list of conditions and the following disclaimer.
9+
#
10+
# Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
18+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21+
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23+
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
# *****************************************************************************
26+
27+
import pandas as pd
28+
from numba import njit
29+
30+
31+
@njit
32+
def series_str_ljust():
33+
series = pd.Series(['dog', 'foo', 'bar']) # Series of 'dog', 'foo', 'bar'
34+
out_series = series.str.ljust(5, '*')
35+
36+
return out_series # Expect series of 'dog**', 'foo**', 'bar**'
37+
38+
39+
print(series_str_ljust())

examples/series_str_rjust.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# *****************************************************************************
2+
# Copyright (c) 2019, Intel Corporation All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# Redistributions of source code must retain the above copyright notice,
8+
# this list of conditions and the following disclaimer.
9+
#
10+
# Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
18+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21+
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23+
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
# *****************************************************************************
26+
27+
import pandas as pd
28+
from numba import njit
29+
30+
31+
@njit
32+
def series_str_rjust():
33+
series = pd.Series(['dog', 'foo', 'bar']) # Series of 'dog', 'foo', 'bar'
34+
out_series = series.str.rjust(5, '*')
35+
36+
return out_series # Expect series of '**dog', '**foo', '**bar'
37+
38+
39+
print(series_str_rjust())

sdc/datatypes/hpat_pandas_stringmethods_functions.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,150 @@ def hpat_pandas_stringmethods_len_impl(self):
418418
return hpat_pandas_stringmethods_len_impl
419419

420420

421+
@overload_method(StringMethodsType, 'ljust')
422+
def hpat_pandas_stringmethods_ljust(self, width, fillchar=' '):
423+
"""
424+
Intel Scalable Dataframe Compiler User Guide
425+
********************************************
426+
Pandas API: pandas.Series.str.ljust
427+
428+
Examples
429+
--------
430+
.. literalinclude:: ../../../examples/series_str_ljust.py
431+
:language: python
432+
:lines: 27-
433+
:caption: Filling right side of strings in the Series with an additional character
434+
:name: ex_series_str_ljust
435+
436+
.. code-block:: console
437+
438+
> python ./series_str_ljust.py
439+
0 dog**
440+
1 foo**
441+
2 bar**
442+
dtype: object
443+
444+
.. todo:: Add support of 32-bit Unicode for `str.ljust()`
445+
446+
Intel Scalable Dataframe Compiler Developer Guide
447+
*************************************************
448+
449+
Pandas Series method :meth:`pandas.core.strings.StringMethods.ljust()` implementation.
450+
451+
Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.
452+
453+
.. only:: developer
454+
455+
Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_ljust
456+
457+
Parameters
458+
----------
459+
self: :class:`pandas.core.strings.StringMethods`
460+
input arg
461+
width: :obj:`int`
462+
Minimum width of resulting string
463+
fillchar: :obj:`str`
464+
Additional character for filling, default is whitespace
465+
466+
Returns
467+
-------
468+
:obj:`pandas.Series`
469+
returns :obj:`pandas.Series` object
470+
"""
471+
472+
ty_checker = TypeChecker('Method ljust().')
473+
ty_checker.check(self, StringMethodsType)
474+
475+
if not isinstance(width, Integer):
476+
ty_checker.raise_exc(width, 'int', 'width')
477+
478+
accepted_types = (Omitted, StringLiteral, UnicodeType)
479+
if not isinstance(fillchar, accepted_types) and fillchar != ' ':
480+
ty_checker.raise_exc(fillchar, 'str', 'fillchar')
481+
482+
def hpat_pandas_stringmethods_ljust_impl(self, width, fillchar=' '):
483+
item_count = len(self._data)
484+
result = [''] * item_count
485+
for idx, item in enumerate(self._data._data):
486+
result[idx] = item.ljust(width, fillchar)
487+
488+
return pandas.Series(result, self._data._index, name=self._data._name)
489+
490+
return hpat_pandas_stringmethods_ljust_impl
491+
492+
493+
@overload_method(StringMethodsType, 'rjust')
494+
def hpat_pandas_stringmethods_rjust(self, width, fillchar=' '):
495+
"""
496+
Intel Scalable Dataframe Compiler User Guide
497+
********************************************
498+
Pandas API: pandas.Series.str.rjust
499+
500+
Examples
501+
--------
502+
.. literalinclude:: ../../../examples/series_str_rjust.py
503+
:language: python
504+
:lines: 27-
505+
:caption: Filling left side of strings in the Series with an additional character
506+
:name: ex_series_str_rjust
507+
508+
.. code-block:: console
509+
510+
> python ./series_str_rjust.py
511+
0 **dog
512+
1 **foo
513+
2 **bar
514+
dtype: object
515+
516+
.. todo:: Add support of 32-bit Unicode for `str.rjust()`
517+
518+
Intel Scalable Dataframe Compiler Developer Guide
519+
*************************************************
520+
521+
Pandas Series method :meth:`pandas.core.strings.StringMethods.rjust()` implementation.
522+
523+
Note: Unicode type of list elements are supported only. Numpy.NaN is not supported as elements.
524+
525+
.. only:: developer
526+
527+
Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_rjust
528+
529+
Parameters
530+
----------
531+
self: :class:`pandas.core.strings.StringMethods`
532+
input arg
533+
width: :obj:`int`
534+
Minimum width of resulting string
535+
fillchar: :obj:`str`
536+
Additional character for filling, default is whitespace
537+
538+
Returns
539+
-------
540+
:obj:`pandas.Series`
541+
returns :obj:`pandas.Series` object
542+
"""
543+
544+
ty_checker = TypeChecker('Method rjust().')
545+
ty_checker.check(self, StringMethodsType)
546+
547+
if not isinstance(width, Integer):
548+
ty_checker.raise_exc(width, 'int', 'width')
549+
550+
accepted_types = (Omitted, StringLiteral, UnicodeType)
551+
if not isinstance(fillchar, accepted_types) and fillchar != ' ':
552+
ty_checker.raise_exc(fillchar, 'str', 'fillchar')
553+
554+
def hpat_pandas_stringmethods_rjust_impl(self, width, fillchar=' '):
555+
item_count = len(self._data)
556+
result = [''] * item_count
557+
for idx, item in enumerate(self._data._data):
558+
result[idx] = item.rjust(width, fillchar)
559+
560+
return pandas.Series(result, self._data._index, name=self._data._name)
561+
562+
return hpat_pandas_stringmethods_rjust_impl
563+
564+
421565
@overload_method(StringMethodsType, 'startswith')
422566
def hpat_pandas_stringmethods_startswith(self, pat, na=None):
423567
"""

sdc/hiframes/pd_series_ext.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -759,8 +759,8 @@ def resolve_head(self, ary, args, kws):
759759
"""
760760

761761
str2str_methods_excluded = [
762-
'upper', 'center', 'endswith', 'find', 'isupper', 'len',
763-
'lower', 'lstrip', 'rstrip', 'startswith', 'strip'
762+
'upper', 'center', 'endswith', 'find', 'isupper', 'len', 'ljust',
763+
'lower', 'lstrip', 'rjust', 'rstrip', 'startswith', 'strip'
764764
]
765765
"""
766766
Functions which are used from Numba directly by calling from StringMethodsType

sdc/tests/test_series.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,22 @@ def _make_func_use_method_arg1(method):
208208
return _make_func_from_text(func_text)
209209

210210

211+
def ljust_usecase(series, width):
212+
return series.str.ljust(width)
213+
214+
215+
def ljust_with_fillchar_usecase(series, width, fillchar):
216+
return series.str.ljust(width, fillchar)
217+
218+
219+
def rjust_usecase(series, width):
220+
return series.str.rjust(width)
221+
222+
223+
def rjust_with_fillchar_usecase(series, width, fillchar):
224+
return series.str.rjust(width, fillchar)
225+
226+
211227
GLOBAL_VAL = 2
212228

213229

@@ -2589,6 +2605,58 @@ def test_impl(S):
25892605
S = pd.Series(data, index, name=name)
25902606
pd.testing.assert_series_equal(hpat_func(S), test_impl(S))
25912607

2608+
def test_series_str_just_default_fillchar(self):
2609+
data = test_global_input_data_unicode_kind1
2610+
series = pd.Series(data)
2611+
width = max(len(s) for s in data) + 5
2612+
2613+
pyfuncs = [ljust_usecase, rjust_usecase]
2614+
for pyfunc in pyfuncs:
2615+
cfunc = self.jit(pyfunc)
2616+
pd.testing.assert_series_equal(cfunc(series, width),
2617+
pyfunc(series, width))
2618+
2619+
def test_series_str_just(self):
2620+
data = test_global_input_data_unicode_kind1
2621+
data_lengths = [len(s) for s in data]
2622+
widths = [max(data_lengths) + 5, min(data_lengths)]
2623+
2624+
pyfuncs = [ljust_with_fillchar_usecase, rjust_with_fillchar_usecase]
2625+
for index in [None, list(range(len(data)))[::-1], data[::-1]]:
2626+
series = pd.Series(data, index, name='A')
2627+
for width, fillchar in product(widths, ['\t']):
2628+
for pyfunc in pyfuncs:
2629+
cfunc = self.jit(pyfunc)
2630+
jit_result = cfunc(series, width, fillchar)
2631+
ref_result = pyfunc(series, width, fillchar)
2632+
pd.testing.assert_series_equal(jit_result, ref_result)
2633+
2634+
def test_series_str_just_exception_unsupported_fillchar(self):
2635+
data = test_global_input_data_unicode_kind1
2636+
series = pd.Series(data)
2637+
width = max(len(s) for s in data) + 5
2638+
msg_tmpl = 'Method {}(). The object fillchar\n given: int64\n expected: str'
2639+
2640+
pyfuncs = [('ljust', ljust_with_fillchar_usecase),
2641+
('rjust', rjust_with_fillchar_usecase)]
2642+
for name, pyfunc in pyfuncs:
2643+
cfunc = self.jit(pyfunc)
2644+
with self.assertRaises(TypingError) as raises:
2645+
cfunc(series, width, 5)
2646+
self.assertIn(msg_tmpl.format(name), str(raises.exception))
2647+
2648+
def test_series_str_just_exception_unsupported_kind4(self):
2649+
data = test_global_input_data_unicode_kind4
2650+
series = pd.Series(data)
2651+
width = max(len(s) for s in data) + 5
2652+
msg = 'NULL object passed to Py_BuildValue'
2653+
2654+
for pyfunc in [ljust_usecase, rjust_usecase]:
2655+
cfunc = self.jit(pyfunc)
2656+
with self.assertRaises(SystemError) as raises:
2657+
cfunc(series, width)
2658+
self.assertIn(msg, str(raises.exception))
2659+
25922660
def test_series_str_startswith(self):
25932661
def test_impl(series, pat):
25942662
return series.str.startswith(pat)

0 commit comments

Comments
 (0)