Skip to content

Commit 45bd628

Browse files
committed
[dateparser] Fix parsing very short weekday names
- Develop method to remove_multiple_occurrences of the day(s) of the week - #1170
1 parent 1bea64d commit 45bd628

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

dateparser/languages/locale.py

+26
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,31 @@ def clean_dictionary(dictionary, threshold=2):
109109
del dictionary[del_key]
110110
return dictionary
111111

112+
@property
113+
def weekdays(self):
114+
weekdays = [
115+
"monday",
116+
"tuesday",
117+
"wednesday",
118+
"thursday",
119+
"friday",
120+
"saturday",
121+
"sunday",
122+
]
123+
return weekdays
124+
125+
def remove_multiple_occurrences(self, date_str_tokens: list):
126+
# first occurrence of day of the week will be considered
127+
# followings occurrence(s) will be skipped and removed from the token list.
128+
weekdays_counter = 0
129+
for i, token in enumerate(date_str_tokens):
130+
if token in self.weekdays:
131+
weekdays_counter += 1
132+
133+
if weekdays_counter > 1:
134+
date_str_tokens.pop(i)
135+
continue
136+
112137
def translate(self, date_string, keep_formatting=False, settings=None):
113138
"""
114139
Translate the date string to its English equivalent.
@@ -145,6 +170,7 @@ def translate(self, date_string, keep_formatting=False, settings=None):
145170
if "in" in date_string_tokens:
146171
date_string_tokens = self._clear_future_words(date_string_tokens)
147172

173+
self.remove_multiple_occurrences(date_string_tokens)
148174
return self._join(
149175
list(filter(bool, date_string_tokens)),
150176
separator="" if keep_formatting else " ",

0 commit comments

Comments
 (0)