Skip to content

Commit 2908d21

Browse files
authored
add settings validation (#797)
* add settings validation * some improvements to settings validation * improve settings validation * fix flake8 Co-authored-by: marc <Marc>
1 parent 375d1ab commit 2908d21

File tree

5 files changed

+222
-29
lines changed

5 files changed

+222
-29
lines changed

dateparser/conf.py

Lines changed: 144 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import hashlib
2+
from datetime import datetime
23
from functools import wraps
34

5+
from .parser import date_order_chart
46
from .utils import registry
57

68

@@ -79,8 +81,148 @@ def wrapper(*args, **kwargs):
7981
kwargs['settings'] = settings.replace(mod_settings=mod_settings, **kwargs['settings'])
8082

8183
if not isinstance(kwargs['settings'], Settings):
82-
raise TypeError(
83-
"settings can only be either dict or instance of Settings class")
84+
raise TypeError("settings can only be either dict or instance of Settings class")
8485

8586
return f(*args, **kwargs)
8687
return wrapper
88+
89+
90+
class SettingValidationError(ValueError):
91+
pass
92+
93+
94+
def _check_repeated_values(setting_name, setting_value):
95+
if len(setting_value) != len(set(setting_value)):
96+
raise SettingValidationError(
97+
'There are repeated values in the "{}" setting'.format(setting_name)
98+
)
99+
return
100+
101+
102+
def _check_require_part(setting_name, setting_value):
103+
"""Returns `True` if the provided list of parts contains valid values"""
104+
invalid_values = set(setting_value) - {'day', 'month', 'year'}
105+
if invalid_values:
106+
raise SettingValidationError(
107+
'"{}" setting contains invalid values: {}'.format(
108+
setting_name, ', '.join(invalid_values)
109+
)
110+
)
111+
_check_repeated_values(setting_name, setting_value)
112+
113+
114+
def _check_parsers(setting_name, setting_value):
115+
"""Returns `True` if the provided list of parsers contains valid values"""
116+
existing_parsers = [
117+
'timestamp', 'relative-time', 'custom-formats', 'absolute-time', 'no-spaces-time'
118+
] # FIXME: Extract the list of existing parsers from another place (#798)
119+
unknown_parsers = set(setting_value) - set(existing_parsers)
120+
if unknown_parsers:
121+
raise SettingValidationError(
122+
'Found unknown parsers in the "{}" setting: {}'.format(
123+
setting_name, ', '.join(unknown_parsers)
124+
)
125+
)
126+
_check_repeated_values(setting_name, setting_value)
127+
128+
129+
def check_settings(settings):
130+
"""
131+
Check if provided settings are valid, if not it raises `SettingValidationError`.
132+
Only checks for the modified settings.
133+
"""
134+
settings_values = {
135+
'DATE_ORDER': {
136+
'values': tuple(date_order_chart.keys()),
137+
'type': str,
138+
},
139+
'TIMEZONE': {
140+
# we don't check invalid Timezones as they raise an error
141+
'type': str,
142+
},
143+
'TO_TIMEZONE': {
144+
# It defaults to None, but it's not allowed to use it directly
145+
# "values" can take unlimited options
146+
'type': str
147+
},
148+
'RETURN_AS_TIMEZONE_AWARE': {
149+
# It defaults to 'default', but it's not allowed to use it directly
150+
'type': bool
151+
},
152+
'PREFER_DAY_OF_MONTH': {
153+
'values': ('current', 'first', 'last'),
154+
'type': str
155+
},
156+
'PREFER_DATES_FROM': {
157+
'values': ('current_period', 'past', 'future'),
158+
'type': str,
159+
},
160+
'RELATIVE_BASE': {
161+
# "values" can take unlimited options
162+
'type': datetime
163+
},
164+
'STRICT_PARSING': {
165+
'type': bool
166+
},
167+
'REQUIRE_PARTS': {
168+
# "values" covered by the 'extra_check'
169+
'type': list,
170+
'extra_check': _check_require_part
171+
},
172+
'SKIP_TOKENS': {
173+
# "values" can take unlimited options
174+
'type': list,
175+
},
176+
'NORMALIZE': {
177+
'type': bool
178+
},
179+
'RETURN_TIME_AS_PERIOD': {
180+
'type': bool
181+
},
182+
'PARSERS': {
183+
# "values" covered by the 'extra_check'
184+
'type': list,
185+
'extra_check': _check_parsers
186+
},
187+
'FUZZY': {
188+
'type': bool
189+
},
190+
'PREFER_LOCALE_DATE_ORDER': {
191+
'type': bool
192+
},
193+
}
194+
195+
modified_settings = settings._mod_settings # check only modified settings
196+
197+
# check settings keys:
198+
for setting in modified_settings:
199+
if setting not in settings_values:
200+
raise SettingValidationError('"{}" is not a valid setting'.format(setting))
201+
202+
for setting_name, setting_value in modified_settings.items():
203+
setting_type = type(setting_value)
204+
setting_props = settings_values[setting_name]
205+
206+
# check type:
207+
if not setting_type == setting_props['type']:
208+
raise SettingValidationError(
209+
'"{}" must be "{}", not "{}".'.format(
210+
setting_name, setting_props['type'].__name__, setting_type.__name__
211+
)
212+
)
213+
214+
# check values:
215+
if setting_props.get('values') and setting_value not in setting_props['values']:
216+
raise SettingValidationError(
217+
'"{}" is not a valid value for "{}", it should be: "{}" or "{}"'.format(
218+
setting_value,
219+
setting_name,
220+
'", "'.join(setting_props['values'][:-1]),
221+
setting_props['values'][-1],
222+
)
223+
)
224+
225+
# specific checks
226+
extra_check = setting_props.get('extra_check')
227+
if extra_check:
228+
extra_check(setting_name, setting_value)

dateparser/date.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from dateparser.date_parser import date_parser
99
from dateparser.freshness_date_parser import freshness_date_parser
1010
from dateparser.languages.loader import LocaleDataLoader
11-
from dateparser.conf import apply_settings
11+
from dateparser.conf import apply_settings, check_settings
1212
from dateparser.parser import _parse_absolute, _parse_nospaces
1313
from dateparser.timezone_parser import pop_tz_offset_from_string
1414
from dateparser.utils import apply_timezone_from_settings, \
@@ -170,13 +170,6 @@ def __init__(self, locale, date_string, date_formats, settings=None):
170170
'absolute-time': self._try_absolute_parser,
171171
'no-spaces-time': self._try_nospaces_parser,
172172
}
173-
unknown_parsers = set(self._settings.PARSERS) - set(self._parsers.keys())
174-
if unknown_parsers:
175-
raise ValueError(
176-
'Unknown parsers found in the PARSERS setting: {}'.format(
177-
', '.join(sorted(unknown_parsers))
178-
)
179-
)
180173

181174
@classmethod
182175
def parse(cls, locale, date_string, date_formats=None, settings=None):
@@ -329,6 +322,8 @@ def __init__(self, languages=None, locales=None, region=None, try_previous_local
329322
if not locales and use_given_order:
330323
raise ValueError("locales must be given if use_given_order is True")
331324

325+
check_settings(settings)
326+
332327
self._settings = settings
333328
self.try_previous_locales = try_previous_locales
334329
self.use_given_order = use_given_order

dateparser/parser.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,26 +38,28 @@ def get_unresolved_attrs(parser_object):
3838
return seen, unseen
3939

4040

41+
date_order_chart = {
42+
'DMY': '%d%m%y',
43+
'DYM': '%d%y%m',
44+
'MDY': '%m%d%y',
45+
'MYD': '%m%y%d',
46+
'YDM': '%y%d%m',
47+
'YMD': '%y%m%d',
48+
}
49+
50+
4151
def resolve_date_order(order, lst=None):
42-
chart = {
43-
'MDY': '%m%d%y',
44-
'MYD': '%m%y%d',
45-
'YMD': '%y%m%d',
46-
'YDM': '%y%d%m',
47-
'DMY': '%d%m%y',
48-
'DYM': '%d%y%m',
49-
}
5052

5153
chart_list = {
54+
'DMY': ['day', 'month', 'year'],
55+
'DYM': ['day', 'year', 'month'],
5256
'MDY': ['month', 'day', 'year'],
5357
'MYD': ['month', 'year', 'day'],
54-
'YMD': ['year', 'month', 'day'],
5558
'YDM': ['year', 'day', 'month'],
56-
'DMY': ['day', 'month', 'year'],
57-
'DYM': ['day', 'year', 'month'],
59+
'YMD': ['year', 'month', 'day'],
5860
}
5961

60-
return chart_list[order] if lst else chart[order]
62+
return chart_list[order] if lst else date_order_chart[order]
6163

6264

6365
def _parse_absolute(datestring, settings):

tests/test_date.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -450,12 +450,6 @@ def test_parsing_date_using_invalid_type_date_format_must_raise_error(
450450
TypeError, ["Date formats should be list, tuple or set of strings",
451451
"'{}' object is not iterable".format(type(date_formats).__name__)])
452452

453-
def test_parsing_date_using_unknown_parsers_must_raise_error(self):
454-
self.given_parser(settings={'PARSERS': ['foo']})
455-
self.when_date_string_is_parsed('2020-02-19')
456-
self.then_error_was_raised(
457-
ValueError, ["Unknown parsers found in the PARSERS setting: foo"])
458-
459453
@parameterized.expand([
460454
param(date_string={"date": "12/11/1998"}),
461455
param(date_string=[2017, 12, 1]),

tests/test_settings.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from tests import BaseTestCase
66

7-
from dateparser.conf import settings
7+
from dateparser.conf import settings, SettingValidationError
88
from dateparser.conf import apply_settings
99

1010
from dateparser import parse, DateDataParser
@@ -152,6 +152,66 @@ def test_error_is_raised_for_invalid_type_settings(self):
152152
self.error = error
153153
self.then_error_was_raised(TypeError, ["settings can only be either dict or instance of Settings class"])
154154

155+
def test_check_settings_wrong_setting_name(self):
156+
with self.assertRaisesRegex(SettingValidationError, r'.* is not a valid setting'):
157+
DateDataParser(settings={'AAAAA': 'foo'})
158+
159+
@parameterized.expand([
160+
param('DATE_ORDER', 2, 'YYY', 'MDY'),
161+
param('TIMEZONE', False, '', 'Europe/Madrid'), # should we check valid timezones?
162+
param('TO_TIMEZONE', True, '', 'Europe/Madrid'), # should we check valid timezones?
163+
param('RETURN_AS_TIMEZONE_AWARE', 'false', '', True),
164+
param('PREFER_DAY_OF_MONTH', False, 'current_period', 'current'),
165+
param('PREFER_DATES_FROM', True, 'current', 'current_period'),
166+
param('RELATIVE_BASE', 'yesterday', '', datetime.now()),
167+
param('SKIP_TOKENS', 'foo', '', ['foo']),
168+
param('REQUIRE_PARTS', 'day', '', ['month', 'day']),
169+
param('PARSERS', 'absolute-time', '', ['absolute-time', 'no-spaces-time']),
170+
param('STRICT_PARSING', 'true', '', True),
171+
param('RETURN_TIME_AS_PERIOD', 'false', '', True),
172+
param('PREFER_LOCALE_DATE_ORDER', 'true', '', False),
173+
param('NORMALIZE', 'true', '', True),
174+
param('FUZZY', 'true', '', False),
175+
param('PREFER_LOCALE_DATE_ORDER', 'false', '', True),
176+
])
177+
def test_check_settings(self, setting, wrong_type, wrong_value, valid_value):
178+
with self.assertRaisesRegex(
179+
SettingValidationError, r'"{}" must be .*, not "{}".'.format(setting, type(wrong_type).__name__)
180+
):
181+
DateDataParser(settings={setting: wrong_type})
182+
183+
if wrong_value:
184+
with self.assertRaisesRegex(
185+
SettingValidationError, r'"{}" is not a valid value for "{}", it should be: .*'.format(
186+
str(wrong_value).replace('[', '\\[').replace(']', '\\]'), setting
187+
)
188+
):
189+
DateDataParser(settings={setting: wrong_value})
190+
191+
# check that a valid value doesn't raise an error
192+
assert DateDataParser(settings={setting: valid_value})
193+
194+
def test_check_settings_extra_check_require_parts(self):
195+
with self.assertRaisesRegex(
196+
SettingValidationError, r'"REQUIRE_PARTS" setting contains invalid values: time'
197+
):
198+
DateDataParser(settings={'REQUIRE_PARTS': ['time', 'day']})
199+
with self.assertRaisesRegex(
200+
SettingValidationError, r'There are repeated values in the "REQUIRE_PARTS" setting'
201+
):
202+
DateDataParser(settings={'REQUIRE_PARTS': ['month', 'day', 'month']})
203+
204+
def test_check_settings_extra_check_parsers(self):
205+
with self.assertRaisesRegex(
206+
SettingValidationError, r'Found unknown parsers in the "PARSERS" setting: no-spaces'
207+
):
208+
DateDataParser(settings={'PARSERS': ['absolute-time', 'no-spaces']})
209+
210+
with self.assertRaisesRegex(
211+
SettingValidationError, r'There are repeated values in the "PARSERS" setting'
212+
):
213+
DateDataParser(settings={'PARSERS': ['absolute-time', 'timestamp', 'absolute-time']})
214+
155215

156216
@pytest.mark.parametrize(
157217
"date_string,expected_result", [

0 commit comments

Comments
 (0)