diff --git a/.travis.yml b/.travis.yml
index e963c4947..adb68ba8e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,7 +13,7 @@ matrix:
- python: 3.7
env: TOXENV=py37
- python: 3.8
- env: TOXENV=py38
+ env: TOXENV=py38 PYPI_RELEASE_JOB=true
- python: 3.9-dev
env: TOXENV=py39
@@ -41,4 +41,4 @@ deploy:
on:
tags: true
repo: scrapinghub/dateparser
- condition: "$TOXENV == py27"
+ condition: "$PYPI_RELEASE_JOB == true
diff --git a/README.rst b/README.rst
index 3bf7caa6d..20fd15545 100644
--- a/README.rst
+++ b/README.rst
@@ -1,486 +1,209 @@
-====================================================
-dateparser -- python parser for human readable dates
-====================================================
+.. Note that we use raw HTML in the header section because centering images and paragraphs is not supported in Github (https://github.com/github/markup/issues/163)
+
+.. raw:: html
+
+
+
+
+
+
+
+
+
+ Python parser for human readable dates
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Key Features •
+ How To Use •
+ Installation •
+ Common use cases •
+ You may also like... •
+ License
+
+
+
+Key Features
+------------
+
+- Support for almost every existing date format: absolute dates,
+ relative dates (``"two weeks ago"`` or ``"tomorrow"``), timestamps,
+ etc.
+- Support for more than `200 language
+ locales `__.
+- Language autodetection
+- Customizable behavior through
+ `settings `__.
+- Support for `non-Gregorian calendar
+ systems `__.
+- Support for dates with timezones abbreviations or UTC offsets
+ (``"August 14, 2015 EST"``, ``"21 July 2013 10:15 pm +0500"``...)
+- `Search
+ dates `__
+ in longer texts.
+
+How To Use
+----------
+
+The most straightforward way to parse dates with **dateparser** is to
+use the ``dateparser.parse()`` function, that wraps around most of the
+functionality of the module.
+
+.. code:: python
-.. image:: https://img.shields.io/travis/scrapinghub/dateparser/master.svg?style=flat-square
- :target: https://travis-ci.org/scrapinghub/dateparser
- :alt: travis build status
-
-.. image:: https://img.shields.io/pypi/v/dateparser.svg?style=flat-square
- :target: https://pypi.python.org/pypi/dateparser
- :alt: pypi version
-
-.. image:: https://readthedocs.org/projects/dateparser/badge/?version=latest
- :target: http://dateparser.readthedocs.org/en/latest/?badge=latest
- :alt: Documentation Status
-
-.. image:: https://codecov.io/gh/scrapinghub/dateparser/branch/master/graph/badge.svg
- :target: https://codecov.io/gh/scrapinghub/dateparser
- :alt: Code Coverage
-
-.. image:: https://badges.gitter.im/scrapinghub/dateparser.svg
- :alt: Join the chat at https://gitter.im/scrapinghub/dateparser
- :target: https://gitter.im/scrapinghub/dateparser?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
-
-
-`dateparser` provides modules to easily parse localized dates in almost
-any string formats commonly found on web pages.
-
-.. contents::
-
-Documentation
-=============
-
-Documentation is built automatically and can be found on
-`Read the Docs `_.
-
-
-Features
-========
+ >>> import dateparser
-* Generic parsing of dates in over 200 language locales plus numerous formats in a language agnostic fashion.
-* Generic parsing of relative dates like: ``'1 min ago'``, ``'2 weeks ago'``, ``'3 months, 1 week and 1 day ago'``, ``'in 2 days'``, ``'tomorrow'``.
-* Generic parsing of dates with time zones abbreviations or UTC offsets like: ``'August 14, 2015 EST'``, ``'July 4, 2013 PST'``, ``'21 July 2013 10:15 pm +0500'``.
-* Date lookup in longer texts.
-* Support for non-Gregorian calendar systems. See `Supported Calendars`_.
-* Extensive test coverage.
+ >>> dateparser.parse('Fri, 12 Dec 2014 10:55:50')
+ datetime.datetime(2014, 12, 12, 10, 55, 50)
+ >>> dateparser.parse('1991-05-17')
+ datetime.datetime(1991, 5, 17, 0, 0)
-Usage
-=====
+ >>> dateparser.parse('In two months') # today is 1st Aug 2020
+ datetime.datetime(2020, 10, 1, 11, 12, 27, 764201)
-The most straightforward way is to use the `dateparser.parse <#dateparser.parse>`_ function,
-that wraps around most of the functionality in the module.
+ >>> dateparser.parse('1484823450') # timestamp
+ datetime.datetime(2017, 1, 19, 10, 57, 30)
-.. automodule:: dateparser
- :members: parse
+ >>> dateparser.parse('January 12, 2012 10:00 PM EST')
+ datetime.datetime(2012, 1, 12, 22, 0, tzinfo=)
+As you can see, **dateparser** works with different date formats, but it
+can also be used directly with strings in different languages:
-Popular Formats
----------------
+.. code:: python
- >>> import dateparser
- >>> dateparser.parse('12/12/12')
- datetime.datetime(2012, 12, 12, 0, 0)
- >>> dateparser.parse('Fri, 12 Dec 2014 10:55:50')
- datetime.datetime(2014, 12, 12, 10, 55, 50)
>>> dateparser.parse('Martes 21 de Octubre de 2014') # Spanish (Tuesday 21 October 2014)
datetime.datetime(2014, 10, 21, 0, 0)
+
>>> dateparser.parse('Le 11 Décembre 2014 à 09:00') # French (11 December 2014 at 09:00)
datetime.datetime(2014, 12, 11, 9, 0)
+
>>> dateparser.parse('13 января 2015 г. в 13:34') # Russian (13 January 2015 at 13:34)
datetime.datetime(2015, 1, 13, 13, 34)
+
>>> dateparser.parse('1 เดือนตุลาคม 2005, 1:00 AM') # Thai (1 October 2005, 1:00 AM)
datetime.datetime(2005, 10, 1, 1, 0)
-This will try to parse a date from the given string, attempting to
-detect the language each time.
-
-You can specify the language(s), if known, using ``languages`` argument. In this case, given languages are used and language detection is skipped:
-
- >>> dateparser.parse('2015, Ago 15, 1:08 pm', languages=['pt', 'es'])
- datetime.datetime(2015, 8, 15, 13, 8)
-
-If you know the possible formats of the dates, you can
-use the ``date_formats`` argument:
-
- >>> dateparser.parse('22 Décembre 2010', date_formats=['%d %B %Y'])
- datetime.datetime(2010, 12, 22, 0, 0)
-
-
-Relative Dates
---------------
-
- >>> parse('1 hour ago')
- datetime.datetime(2015, 5, 31, 23, 0)
- >>> parse('Il ya 2 heures') # French (2 hours ago)
- datetime.datetime(2015, 5, 31, 22, 0)
- >>> parse('1 anno 2 mesi') # Italian (1 year 2 months)
- datetime.datetime(2014, 4, 1, 0, 0)
- >>> parse('yaklaşık 23 saat önce') # Turkish (23 hours ago)
- datetime.datetime(2015, 5, 31, 1, 0)
- >>> parse('Hace una semana') # Spanish (a week ago)
- datetime.datetime(2015, 5, 25, 0, 0)
- >>> parse('2小时前') # Chinese (2 hours ago)
- datetime.datetime(2015, 5, 31, 22, 0)
-
-.. note:: Testing above code might return different values for you depending on your environment's current date and time.
-
-.. note:: Support for relative dates in future needs a lot of improvement, we look forward to community's contribution to get better on that part. See `Contributing`_.
-
-
-OOTB Language Based Date Order Preference
------------------------------------------
-
- >>> # parsing ambiguous date
- >>> parse('02-03-2016') # assumes english language, uses MDY date order
- datetime.datetime(2016, 2, 3, 0, 0)
- >>> parse('le 02-03-2016') # detects french, uses DMY date order
- datetime.datetime(2016, 3, 2, 0, 0)
-
-.. note:: Ordering is not locale based, that's why do not expect `DMY` order for UK/Australia English. You can specify date order in that case as follows usings `Settings`_:
-
- >>> parse('18-12-15 06:00', settings={'DATE_ORDER': 'DMY'})
- datetime.datetime(2015, 12, 18, 6, 0)
-
-For more on date order, please look at `Settings`_.
-
-
-Timezone and UTC Offset
------------------------
-
-By default, `dateparser` returns tzaware `datetime` if timezone is present in date string. Otherwise, it returns a naive `datetime` object.
-
- >>> parse('January 12, 2012 10:00 PM EST')
- datetime.datetime(2012, 1, 12, 22, 0, tzinfo=)
+ >>> dateparser.parse('yaklaşık 23 saat önce') # Turkish (23 hours ago), current time: 12:46
+ datetime.datetime(2019, 9, 7, 13, 46)
- >>> parse('January 12, 2012 10:00 PM -0500')
- datetime.datetime(2012, 1, 12, 22, 0, tzinfo=)
+ >>> dateparser.parse('2小时前') # Chinese (2 hours ago), current time: 22:30
+ datetime.datetime(2018, 5, 31, 20, 30)
- >>> parse('2 hours ago EST')
- datetime.datetime(2017, 3, 10, 15, 55, 39, 579667, tzinfo=)
+You can control multiple behaviors by using the ``settings`` parameter:
- >>> parse('2 hours ago -0500')
- datetime.datetime(2017, 3, 10, 15, 59, 30, 193431, tzinfo=)
+.. code:: python
- If date has no timezone name/abbreviation or offset, you can specify it using `TIMEZONE` setting.
+ >>> dateparser.parse('2014-10-12', settings={'DATE_ORDER': 'YMD'})
+ datetime.datetime(2014, 10, 12, 0, 0)
- >>> parse('January 12, 2012 10:00 PM', settings={'TIMEZONE': 'US/Eastern'})
- datetime.datetime(2012, 1, 12, 22, 0)
+ >>> dateparser.parse('2014-10-12', settings={'DATE_ORDER': 'YDM'})
+ datetime.datetime(2014, 12, 10, 0, 0)
- >>> parse('January 12, 2012 10:00 PM', settings={'TIMEZONE': '+0500'})
- datetime.datetime(2012, 1, 12, 22, 0)
+ >>> dateparser.parse('1 year', settings={'PREFER_DATES_FROM': 'future'}) # Today is 2020-09-23
+ datetime.datetime(2021, 9, 23, 0, 0)
-`TIMEZONE` option may not be useful alone as it only attaches given timezone to
-resultant `datetime` object. But can be useful in cases where you want conversions from and to different
-timezones or when simply want a tzaware date with given timezone info attached.
+ >>> dateparser.parse('tomorrow', settings={'RELATIVE_BASE': datetime.datetime(1992, 1, 1)})
+ datetime.datetime(1992, 1, 2, 0, 0)
- >>> parse('January 12, 2012 10:00 PM', settings={'TIMEZONE': 'US/Eastern', 'RETURN_AS_TIMEZONE_AWARE': True})
- datetime.datetime(2012, 1, 12, 22, 0, tzinfo=)
+To see more examples on how to use the ``settings``, check the `settings
+section `__
+in the docs.
+False positives
+^^^^^^^^^^^^^^^
- >>> parse('10:00 am', settings={'TIMEZONE': 'EST', 'TO_TIMEZONE': 'EDT'})
- datetime.datetime(2016, 9, 25, 11, 0)
+.. warning::
+ **dateparser** will do its best to return a date, dealing with multiple formats and different
+ locales. For that reason it is important that the input is a valid date, otherwise it could
+ return false positives.
-Some more use cases for conversion of timezones.
- >>> parse('10:00 am EST', settings={'TO_TIMEZONE': 'EDT'}) # date string has timezone info
- datetime.datetime(2017, 3, 12, 11, 0, tzinfo=)
+To reduce the possibility of receiving false positives, make sure that:
- >>> parse('now EST', settings={'TO_TIMEZONE': 'UTC'}) # relative dates
- datetime.datetime(2017, 3, 10, 23, 24, 47, 371823, tzinfo=)
+- The input string it's a valid date and it doesn't contain any other words or numbers.
+- If you know the language or languages beforehand you add them through the ``languages`` or ``locales`` settings.
-In case, no timezone is present in date string or defined in `settings`. You can still
-return tzaware `datetime`. It is especially useful in case of relative dates when uncertain
-what timezone is relative base.
- >>> parse('2 minutes ago', settings={'RETURN_AS_TIMEZONE_AWARE': True})
- datetime.datetime(2017, 3, 11, 4, 25, 24, 152670, tzinfo=)
+On the other hand, if you want to exclude any of the default parsers
+(``timestamp``, ``relative-time``...) or change the order in which they
+are executed, you can do so through the
+`settings PARSERS `_.
-In case, you want to compute relative dates in UTC instead of default system's local timezone, you can use `TIMEZONE` setting.
+Installation
+------------
- >>> parse('4 minutes ago', settings={'TIMEZONE': 'UTC'})
- datetime.datetime(2017, 3, 10, 23, 27, 59, 647248, tzinfo=)
+Dateparser supports Python >= 3.5. You can install it by doing:
-.. note:: In case, when timezone is present both in string and also specified using `settings`, string is parsed into tzaware representation and then converted to timezone specified in `settings`.
+::
- >>> parse('10:40 pm PKT', settings={'TIMEZONE': 'UTC'})
- datetime.datetime(2017, 3, 12, 17, 40, tzinfo=)
+ $ pip install dateparser
- >>> parse('20 mins ago EST', settings={'TIMEZONE': 'UTC'})
- datetime.datetime(2017, 3, 12, 21, 16, 0, 885091, tzinfo=)
+If you want to use the jalali or hijri calendar, you need to install the
+``calendars`` extra:
-For more on timezones, please look at `Settings`_.
+::
+ $ pip install dateparser[calendars]
-Incomplete Dates
+Common use cases
----------------
- >>> from dateparser import parse
- >>> parse('December 2015') # default behavior
- datetime.datetime(2015, 12, 16, 0, 0)
- >>> parse('December 2015', settings={'PREFER_DAY_OF_MONTH': 'last'})
- datetime.datetime(2015, 12, 31, 0, 0)
- >>> parse('December 2015', settings={'PREFER_DAY_OF_MONTH': 'first'})
- datetime.datetime(2015, 12, 1, 0, 0)
-
- >>> parse('March')
- datetime.datetime(2015, 3, 16, 0, 0)
- >>> parse('March', settings={'PREFER_DATES_FROM': 'future'})
- datetime.datetime(2016, 3, 16, 0, 0)
- >>> # parsing with preference set for 'past'
- >>> parse('August', settings={'PREFER_DATES_FROM': 'past'})
- datetime.datetime(2015, 8, 15, 0, 0)
-
-You can also ignore parsing incomplete dates altogether by setting `STRICT_PARSING` flag as follows:
-
- >>> parse('December 2015', settings={'STRICT_PARSING': True})
- None
-
-For more on handling incomplete dates, please look at `Settings`_.
-
-
-Search for Dates in Longer Chunks of Text
------------------------------------------
-
-You can extract dates from longer strings of text. They are returned as list of tuples with text chunk containing the date and parsed datetime object.
-
-.. automodule:: dateparser.search
- :members: search_dates
-
-Dependencies
-============
-
-`dateparser` relies on following libraries in some ways:
-
- * dateutil_'s module ``relativedelta`` for its freshness parser.
- * convertdate_ to convert *Jalali* dates to *Gregorian*.
- * hijri-converter_ to convert *Hijri* dates to *Gregorian*.
- * tzlocal_ to reliably get local timezone.
- * ruamel.yaml_ (optional) for operations on language files.
-
-.. _dateutil: https://pypi.python.org/pypi/python-dateutil
-.. _convertdate: https://pypi.python.org/pypi/convertdate
-.. _hijri-converter: https://pypi.python.org/pypi/hijri-converter
-.. _tzlocal: https://pypi.python.org/pypi/tzlocal
-.. _ruamel.yaml: https://pypi.python.org/pypi/ruamel.yaml
-
-Supported languages and locales
-===============================
-
-============ ================================================================
- Language Locales
-============ ================================================================
-en 'en-001', 'en-150', 'en-AG', 'en-AI', 'en-AS', 'en-AT', 'en-AU', 'en-BB', 'en-BE', 'en-BI', 'en-BM', 'en-BS', 'en-BW', 'en-BZ', 'en-CA', 'en-CC', 'en-CH', 'en-CK', 'en-CM', 'en-CX', 'en-CY', 'en-DE', 'en-DG', 'en-DK', 'en-DM', 'en-ER', 'en-FI', 'en-FJ', 'en-FK', 'en-FM', 'en-GB', 'en-GD', 'en-GG', 'en-GH', 'en-GI', 'en-GM', 'en-GU', 'en-GY', 'en-HK', 'en-IE', 'en-IL', 'en-IM', 'en-IN', 'en-IO', 'en-JE', 'en-JM', 'en-KE', 'en-KI', 'en-KN', 'en-KY', 'en-LC', 'en-LR', 'en-LS', 'en-MG', 'en-MH', 'en-MO', 'en-MP', 'en-MS', 'en-MT', 'en-MU', 'en-MW', 'en-MY', 'en-NA', 'en-NF', 'en-NG', 'en-NL', 'en-NR', 'en-NU', 'en-NZ', 'en-PG', 'en-PH', 'en-PK', 'en-PN', 'en-PR', 'en-PW', 'en-RW', 'en-SB', 'en-SC', 'en-SD', 'en-SE', 'en-SG', 'en-SH', 'en-SI', 'en-SL', 'en-SS', 'en-SX', 'en-SZ', 'en-TC', 'en-TK', 'en-TO', 'en-TT', 'en-TV', 'en-TZ', 'en-UG', 'en-UM', 'en-VC', 'en-VG', 'en-VI', 'en-VU', 'en-WS', 'en-ZA', 'en-ZM', 'en-ZW'
-zh
-zh-Hans 'zh-Hans-HK', 'zh-Hans-MO', 'zh-Hans-SG'
-hi
-es 'es-419', 'es-AR', 'es-BO', 'es-BR', 'es-BZ', 'es-CL', 'es-CO', 'es-CR', 'es-CU', 'es-DO', 'es-EA', 'es-EC', 'es-GQ', 'es-GT', 'es-HN', 'es-IC', 'es-MX', 'es-NI', 'es-PA', 'es-PE', 'es-PH', 'es-PR', 'es-PY', 'es-SV', 'es-US', 'es-UY', 'es-VE'
-ar 'ar-AE', 'ar-BH', 'ar-DJ', 'ar-DZ', 'ar-EG', 'ar-EH', 'ar-ER', 'ar-IL', 'ar-IQ', 'ar-JO', 'ar-KM', 'ar-KW', 'ar-LB', 'ar-LY', 'ar-MA', 'ar-MR', 'ar-OM', 'ar-PS', 'ar-QA', 'ar-SA', 'ar-SD', 'ar-SO', 'ar-SS', 'ar-SY', 'ar-TD', 'ar-TN', 'ar-YE'
-bn 'bn-IN'
-fr 'fr-BE', 'fr-BF', 'fr-BI', 'fr-BJ', 'fr-BL', 'fr-CA', 'fr-CD', 'fr-CF', 'fr-CG', 'fr-CH', 'fr-CI', 'fr-CM', 'fr-DJ', 'fr-DZ', 'fr-GA', 'fr-GF', 'fr-GN', 'fr-GP', 'fr-GQ', 'fr-HT', 'fr-KM', 'fr-LU', 'fr-MA', 'fr-MC', 'fr-MF', 'fr-MG', 'fr-ML', 'fr-MQ', 'fr-MR', 'fr-MU', 'fr-NC', 'fr-NE', 'fr-PF', 'fr-PM', 'fr-RE', 'fr-RW', 'fr-SC', 'fr-SN', 'fr-SY', 'fr-TD', 'fr-TG', 'fr-TN', 'fr-VU', 'fr-WF', 'fr-YT'
-ur 'ur-IN'
-pt 'pt-AO', 'pt-CH', 'pt-CV', 'pt-GQ', 'pt-GW', 'pt-LU', 'pt-MO', 'pt-MZ', 'pt-PT', 'pt-ST', 'pt-TL'
-ru 'ru-BY', 'ru-KG', 'ru-KZ', 'ru-MD', 'ru-UA'
-id
-sw 'sw-CD', 'sw-KE', 'sw-UG'
-pa-Arab
-de 'de-AT', 'de-BE', 'de-CH', 'de-IT', 'de-LI', 'de-LU'
-ja
-te
-mr
-vi
-fa 'fa-AF'
-ta 'ta-LK', 'ta-MY', 'ta-SG'
-tr 'tr-CY'
-yue
-ko 'ko-KP'
-it 'it-CH', 'it-SM', 'it-VA'
-fil
-gu
-th
-kn
-ps
-zh-Hant 'zh-Hant-HK', 'zh-Hant-MO'
-ml
-or
-pl
-my
-pa
-pa-Guru
-am
-om 'om-KE'
-ha 'ha-GH', 'ha-NE'
-nl 'nl-AW', 'nl-BE', 'nl-BQ', 'nl-CW', 'nl-SR', 'nl-SX'
-uk
-uz
-uz-Latn
-yo 'yo-BJ'
-ms 'ms-BN', 'ms-SG'
-ig
-ro 'ro-MD'
-mg
-ne 'ne-IN'
-as
-so 'so-DJ', 'so-ET', 'so-KE'
-si
-km
-zu
-cs
-sv 'sv-AX', 'sv-FI'
-hu
-el 'el-CY'
-sn
-kk
-rw
-ckb 'ckb-IR'
-qu 'qu-BO', 'qu-EC'
-ak
-be
-ti 'ti-ER'
-az
-az-Latn
-af 'af-NA'
-ca 'ca-AD', 'ca-FR', 'ca-IT'
-sr-Latn 'sr-Latn-BA', 'sr-Latn-ME', 'sr-Latn-XK'
-ii
-he
-bg
-bm
-ki
-gsw 'gsw-FR', 'gsw-LI'
-sr
-sr-Cyrl 'sr-Cyrl-BA', 'sr-Cyrl-ME', 'sr-Cyrl-XK'
-ug
-zgh
-ff 'ff-CM', 'ff-GN', 'ff-MR'
-rn
-da 'da-GL'
-hr 'hr-BA'
-sq 'sq-MK', 'sq-XK'
-sk
-fi
-ks
-hy
-nb 'nb-SJ'
-luy
-lg
-lo
-bem
-kok
-luo
-uz-Cyrl
-ka
-ee 'ee-TG'
-mzn
-bs-Cyrl
-bs
-bs-Latn
-kln
-kam
-gl
-tzm
-dje
-kab
-bo 'bo-IN'
-shi-Latn
-shi
-shi-Tfng
-mn
-ln 'ln-AO', 'ln-CF', 'ln-CG'
-ky
-sg
-lt
-nyn
-guz
-cgg
-xog
-lrc 'lrc-IQ'
-mer
-lu
-sl
-teo 'teo-KE'
-brx
-nd
-mk
-uz-Arab
-mas 'mas-TZ'
-nn
-kde
-mfe
-lv
-seh
-mgh
-az-Cyrl
-ga
-eu
-yi
-ce
-et
-ksb
-bez
-ewo
-fy
-ebu
-nus
-ast
-asa
-ses
-os 'os-RU'
-br
-cy
-kea
-lag
-sah
-mt
-vun
-rof
-jmc
-lb
-dav
-dyo
-dz
-nnh
-is
-khq
-bas
-naq
-mua
-ksh
-saq
-se 'se-FI', 'se-SE'
-dua
-rwk
-mgo
-sbp
-to
-jgo
-ksf
-fo 'fo-DK'
-gd
-kl
-rm
-fur
-agq
-haw
-chr
-hsb
-wae
-nmg
-lkt
-twq
-dsb
-yav
-kw
-gv
-smn
-eo
-tl
-============ ================================================================
-
-
-Supported Calendars
-===================
-* Gregorian calendar.
-
-* Persian Jalali calendar. For more information, refer to `Persian Jalali Calendar `_.
-
- >>> from dateparser.calendars.jalali import JalaliCalendar
- >>> JalaliCalendar('جمعه سی ام اسفند ۱۳۸۷').get_date()
- {'date_obj': datetime.datetime(2009, 3, 20, 0, 0), 'period': 'day'}
-
-
-* Hijri/Islamic Calendar. For more information, refer to `Hijri Calendar `_.
-
- >>> from dateparser.calendars.hijri import HijriCalendar
- >>> HijriCalendar('17-01-1437 هـ 08:30 مساءً').get_date()
- {'date_obj': datetime.datetime(2015, 10, 30, 20, 30), 'period': 'day'}
-
-.. note:: `HijriCalendar` only works with Python ≥ 3.6.
-.. note:: For `Finnish` language, please specify `settings={'SKIP_TOKENS': []}` to correctly parse freshness dates.
-
-
-Install using following command to use calendars.
-
-.. tip::
- pip install dateparser[calendars]
+**dateparser** can be used with a really different number of purposes,
+but it stands out when it comes to:
+
+Consuming data from different sources:
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- **Scraping**: extract dates from different places with several
+ different formats and languages
+- **IoT**: consuming data coming from different sources with different
+ date formats
+- **Tooling**: consuming dates from different logs / sources
+- **Format transformations**: when transforming dates coming from
+ different files (PDF, CSV, etc.) to other formats (database, etc).
+
+Offering natural interaction with users:
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- **Tooling and CLI**: allow users to write “3 days ago” to retrieve
+ information.
+- **Search engine**: allow people to search by date in an easiest /
+ natural format.
+- **Bots**: allow users to interact with a bot easily
+
+You may also like...
+--------------------
+
+- `price-parser `__ - A
+ small library for extracting price and currency from raw text
+ strings.
+- `number-parser `__ -
+ Library to convert numbers written in the natural language to it's
+ equivalent numeric forms.
+- `Scrapy `__ - Web crawling and web
+ scraping framework
+
+License
+-------
+
+`BSD
+3-Clause `__
diff --git a/docs/conf.py b/docs/conf.py
index 86e5d2e2a..bdb6d40f6 100755
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -29,7 +29,7 @@
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx.ext.intersphinx']
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx.ext.intersphinx', 'sphinx_rtd_theme']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
@@ -65,7 +65,7 @@
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
-html_theme = 'default'
+html_theme = 'sphinx_rtd_theme'
# Add any paths that contain custom static files (such as style sheets)
# here, relative to this directory. They are copied after the builtin
@@ -117,4 +117,16 @@
]
# sphinx.ext.intersphinx confs
-intersphinx_mapping = {'python': ('https://docs.python.org/2', None)}
+intersphinx_mapping = {'python': ('https://docs.python.org/3', None)}
+
+
+html_theme_options = {
+ 'logo_only': True,
+ 'collapse_navigation': True,
+ 'sticky_navigation': True,
+ 'navigation_depth': 4,
+ 'includehidden': True,
+ 'titles_only': False
+}
+
+html_logo = "../artwork/dateparser-logo.png"
diff --git a/docs/contributing.rst b/docs/contributing.rst
index e582053ea..a00ca1a07 100644
--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@@ -1 +1,2 @@
+.. _contributing:
.. include:: ../CONTRIBUTING.rst
diff --git a/docs/index.rst b/docs/index.rst
index 736090b70..e8ae4082e 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,26 +1,58 @@
-.. dateparser documentation master file, created by
- sphinx-quickstart on Tue Jul 9 22:26:36 2013.
- You can adapt this file completely to your liking, but it should at least
- contain the root `toctree` directive.
+====================================================
+dateparser -- python parser for human readable dates
+====================================================
+
+.. image:: https://img.shields.io/pypi/dm/dateparser
+ :target: https://pypi.python.org/pypi/dateparser
+ :alt: pypi downloads
+
+.. image:: https://img.shields.io/pypi/v/dateparser.svg
+ :target: https://pypi.python.org/pypi/dateparser
+ :alt: pypi version
+
+.. image:: https://codecov.io/gh/scrapinghub/dateparser/branch/master/graph/badge.svg
+ :target: https://codecov.io/gh/scrapinghub/dateparser
+ :alt: Code Coverage
+
+.. image:: https://img.shields.io/travis/scrapinghub/dateparser/master.svg
+ :target: https://travis-ci.org/scrapinghub/dateparser
+ :alt: travis build status
+
+.. image:: https://readthedocs.org/projects/dateparser/badge/?version=latest
+ :target: http://dateparser.readthedocs.org/en/latest/?badge=latest
+ :alt: Documentation Status
+
+
+`dateparser` provides modules to easily parse localized dates in almost
+any string formats commonly found on web pages.
-.. include:: ../README.rst
-.. include:: usage.rst
Documentation
=============
+This documentation is built automatically and can be found on
+`Read the Docs `_.
+
+
+.. include:: introduction.rst
+
+Indices and tables
+==================
+
+
Contents:
.. toctree::
:maxdepth: 2
+ introduction
installation
+ usage
+ supported_locales
contributing
authors
history
-Indices and tables
-==================
* :ref:`genindex`
* :ref:`modindex`
diff --git a/docs/introduction.rst b/docs/introduction.rst
new file mode 100644
index 000000000..c4ed2e3c8
--- /dev/null
+++ b/docs/introduction.rst
@@ -0,0 +1,246 @@
+==========================
+Introduction to dateparser
+==========================
+
+
+Features
+========
+
+* Generic parsing of dates in over 200 language locales plus numerous formats in a language agnostic fashion.
+* Generic parsing of relative dates like: ``'1 min ago'``, ``'2 weeks ago'``, ``'3 months, 1 week and 1 day ago'``, ``'in 2 days'``, ``'tomorrow'``.
+* Generic parsing of dates with time zones abbreviations or UTC offsets like: ``'August 14, 2015 EST'``, ``'July 4, 2013 PST'``, ``'21 July 2013 10:15 pm +0500'``.
+* Date lookup in longer texts.
+* Support for non-Gregorian calendar systems. See `Supported Calendars`_.
+* Extensive test coverage.
+
+
+Basic Usage
+===========
+
+The most straightforward way is to use the `dateparser.parse <#dateparser.parse>`_ function,
+that wraps around most of the functionality in the module.
+
+.. automodule:: dateparser
+ :members: parse
+
+
+Popular Formats
+---------------
+
+ >>> import dateparser
+ >>> dateparser.parse('12/12/12')
+ datetime.datetime(2012, 12, 12, 0, 0)
+ >>> dateparser.parse('Fri, 12 Dec 2014 10:55:50')
+ datetime.datetime(2014, 12, 12, 10, 55, 50)
+ >>> dateparser.parse('Martes 21 de Octubre de 2014') # Spanish (Tuesday 21 October 2014)
+ datetime.datetime(2014, 10, 21, 0, 0)
+ >>> dateparser.parse('Le 11 Décembre 2014 à 09:00') # French (11 December 2014 at 09:00)
+ datetime.datetime(2014, 12, 11, 9, 0)
+ >>> dateparser.parse('13 января 2015 г. в 13:34') # Russian (13 January 2015 at 13:34)
+ datetime.datetime(2015, 1, 13, 13, 34)
+ >>> dateparser.parse('1 เดือนตุลาคม 2005, 1:00 AM') # Thai (1 October 2005, 1:00 AM)
+ datetime.datetime(2005, 10, 1, 1, 0)
+
+This will try to parse a date from the given string, attempting to
+detect the language each time.
+
+You can specify the language(s), if known, using ``languages`` argument. In this case, given languages are used and language detection is skipped:
+
+ >>> dateparser.parse('2015, Ago 15, 1:08 pm', languages=['pt', 'es'])
+ datetime.datetime(2015, 8, 15, 13, 8)
+
+If you know the possible formats of the dates, you can
+use the ``date_formats`` argument:
+
+ >>> dateparser.parse('22 Décembre 2010', date_formats=['%d %B %Y'])
+ datetime.datetime(2010, 12, 22, 0, 0)
+
+
+Relative Dates
+--------------
+
+ >>> parse('1 hour ago')
+ datetime.datetime(2015, 5, 31, 23, 0)
+ >>> parse('Il ya 2 heures') # French (2 hours ago)
+ datetime.datetime(2015, 5, 31, 22, 0)
+ >>> parse('1 anno 2 mesi') # Italian (1 year 2 months)
+ datetime.datetime(2014, 4, 1, 0, 0)
+ >>> parse('yaklaşık 23 saat önce') # Turkish (23 hours ago)
+ datetime.datetime(2015, 5, 31, 1, 0)
+ >>> parse('Hace una semana') # Spanish (a week ago)
+ datetime.datetime(2015, 5, 25, 0, 0)
+ >>> parse('2小时前') # Chinese (2 hours ago)
+ datetime.datetime(2015, 5, 31, 22, 0)
+
+.. note:: Testing above code might return different values for you depending on your environment's current date and time.
+
+.. note:: Support for relative dates in future needs a lot of improvement, we look forward to community's contribution to get better on that part. See ":ref:`contributing`".
+
+
+OOTB Language Based Date Order Preference
+-----------------------------------------
+
+ >>> # parsing ambiguous date
+ >>> parse('02-03-2016') # assumes english language, uses MDY date order
+ datetime.datetime(2016, 2, 3, 0, 0)
+ >>> parse('le 02-03-2016') # detects french, uses DMY date order
+ datetime.datetime(2016, 3, 2, 0, 0)
+
+.. note:: Ordering is not locale based, that's why do not expect `DMY` order for UK/Australia English. You can specify date order in that case as follows using `settings`:
+
+ >>> parse('18-12-15 06:00', settings={'DATE_ORDER': 'DMY'})
+ datetime.datetime(2015, 12, 18, 6, 0)
+
+For more on date order, please look at Settings.
+
+
+
+Timezone and UTC Offset
+-----------------------
+
+By default, `dateparser` returns tzaware `datetime` if timezone is present in date string. Otherwise, it returns a naive `datetime` object.
+
+ >>> parse('January 12, 2012 10:00 PM EST')
+ datetime.datetime(2012, 1, 12, 22, 0, tzinfo=)
+
+ >>> parse('January 12, 2012 10:00 PM -0500')
+ datetime.datetime(2012, 1, 12, 22, 0, tzinfo=)
+
+ >>> parse('2 hours ago EST')
+ datetime.datetime(2017, 3, 10, 15, 55, 39, 579667, tzinfo=)
+
+ >>> parse('2 hours ago -0500')
+ datetime.datetime(2017, 3, 10, 15, 59, 30, 193431, tzinfo=)
+
+ If date has no timezone name/abbreviation or offset, you can specify it using `TIMEZONE` setting.
+
+ >>> parse('January 12, 2012 10:00 PM', settings={'TIMEZONE': 'US/Eastern'})
+ datetime.datetime(2012, 1, 12, 22, 0)
+
+ >>> parse('January 12, 2012 10:00 PM', settings={'TIMEZONE': '+0500'})
+ datetime.datetime(2012, 1, 12, 22, 0)
+
+`TIMEZONE` option may not be useful alone as it only attaches given timezone to
+resultant `datetime` object. But can be useful in cases where you want conversions from and to different
+timezones or when simply want a tzaware date with given timezone info attached.
+
+ >>> parse('January 12, 2012 10:00 PM', settings={'TIMEZONE': 'US/Eastern', 'RETURN_AS_TIMEZONE_AWARE': True})
+ datetime.datetime(2012, 1, 12, 22, 0, tzinfo=)
+
+
+ >>> parse('10:00 am', settings={'TIMEZONE': 'EST', 'TO_TIMEZONE': 'EDT'})
+ datetime.datetime(2016, 9, 25, 11, 0)
+
+Some more use cases for conversion of timezones.
+
+ >>> parse('10:00 am EST', settings={'TO_TIMEZONE': 'EDT'}) # date string has timezone info
+ datetime.datetime(2017, 3, 12, 11, 0, tzinfo=)
+
+ >>> parse('now EST', settings={'TO_TIMEZONE': 'UTC'}) # relative dates
+ datetime.datetime(2017, 3, 10, 23, 24, 47, 371823, tzinfo=)
+
+In case, no timezone is present in date string or defined in `settings`. You can still
+return tzaware `datetime`. It is especially useful in case of relative dates when uncertain
+what timezone is relative base.
+
+ >>> parse('2 minutes ago', settings={'RETURN_AS_TIMEZONE_AWARE': True})
+ datetime.datetime(2017, 3, 11, 4, 25, 24, 152670, tzinfo=)
+
+In case, you want to compute relative dates in UTC instead of default system's local timezone, you can use `TIMEZONE` setting.
+
+ >>> parse('4 minutes ago', settings={'TIMEZONE': 'UTC'})
+ datetime.datetime(2017, 3, 10, 23, 27, 59, 647248, tzinfo=)
+
+.. note:: In case, when timezone is present both in string and also specified using `settings`, string is parsed into tzaware representation and then converted to timezone specified in `settings`.
+
+ >>> parse('10:40 pm PKT', settings={'TIMEZONE': 'UTC'})
+ datetime.datetime(2017, 3, 12, 17, 40, tzinfo=)
+
+ >>> parse('20 mins ago EST', settings={'TIMEZONE': 'UTC'})
+ datetime.datetime(2017, 3, 12, 21, 16, 0, 885091, tzinfo=)
+
+For more on timezones, please look at Settings.
+
+
+Incomplete Dates
+----------------
+
+ >>> from dateparser import parse
+ >>> parse('December 2015') # default behavior
+ datetime.datetime(2015, 12, 16, 0, 0)
+ >>> parse('December 2015', settings={'PREFER_DAY_OF_MONTH': 'last'})
+ datetime.datetime(2015, 12, 31, 0, 0)
+ >>> parse('December 2015', settings={'PREFER_DAY_OF_MONTH': 'first'})
+ datetime.datetime(2015, 12, 1, 0, 0)
+
+ >>> parse('March')
+ datetime.datetime(2015, 3, 16, 0, 0)
+ >>> parse('March', settings={'PREFER_DATES_FROM': 'future'})
+ datetime.datetime(2016, 3, 16, 0, 0)
+ >>> # parsing with preference set for 'past'
+ >>> parse('August', settings={'PREFER_DATES_FROM': 'past'})
+ datetime.datetime(2015, 8, 15, 0, 0)
+
+You can also ignore parsing incomplete dates altogether by setting `STRICT_PARSING` flag as follows:
+
+ >>> parse('December 2015', settings={'STRICT_PARSING': True})
+ None
+
+For more on handling incomplete dates, please look at Settings.
+
+
+Search for Dates in Longer Chunks of Text
+-----------------------------------------
+
+You can extract dates from longer strings of text. They are returned as list of tuples with text chunk containing the date and parsed datetime object.
+
+.. automodule:: dateparser.search
+ :members: search_dates
+
+Dependencies
+============
+
+`dateparser` relies on following libraries in some ways:
+
+ * dateutil_'s module ``relativedelta`` for its freshness parser.
+ * convertdate_ to convert *Jalali* dates to *Gregorian*.
+ * hijri-converter_ to convert *Hijri* dates to *Gregorian*.
+ * tzlocal_ to reliably get local timezone.
+ * ruamel.yaml_ (optional) for operations on language files.
+
+.. _dateutil: https://pypi.python.org/pypi/python-dateutil
+.. _convertdate: https://pypi.python.org/pypi/convertdate
+.. _hijri-converter: https://pypi.python.org/pypi/hijri-converter
+.. _tzlocal: https://pypi.python.org/pypi/tzlocal
+.. _ruamel.yaml: https://pypi.python.org/pypi/ruamel.yaml
+
+Supported languages and locales
+===============================
+You can check the supported locales by visiting the ":ref:`supported-locales`" section.
+
+
+Supported Calendars
+===================
+* Gregorian calendar.
+
+* Persian Jalali calendar. For more information, refer to `Persian Jalali Calendar `_.
+
+ >>> from dateparser.calendars.jalali import JalaliCalendar
+ >>> JalaliCalendar('جمعه سی ام اسفند ۱۳۸۷').get_date()
+ {'date_obj': datetime.datetime(2009, 3, 20, 0, 0), 'period': 'day'}
+
+
+* Hijri/Islamic Calendar. For more information, refer to `Hijri Calendar `_.
+
+ >>> from dateparser.calendars.hijri import HijriCalendar
+ >>> HijriCalendar('17-01-1437 هـ 08:30 مساءً').get_date()
+ {'date_obj': datetime.datetime(2015, 10, 30, 20, 30), 'period': 'day'}
+
+.. note:: `HijriCalendar` only works with Python ≥ 3.6.
+.. note:: For `Finnish` language, please specify `settings={'SKIP_TOKENS': []}` to correctly parse freshness dates.
+
+
+Install using following command to use calendars.
+
+.. tip::
+ pip install dateparser[calendars]
diff --git a/docs/supported_locales.rst b/docs/supported_locales.rst
new file mode 100644
index 000000000..dbd5cb615
--- /dev/null
+++ b/docs/supported_locales.rst
@@ -0,0 +1,214 @@
+.. _supported-locales:
+
+Supported languages and locales
+===============================
+
+============ ================================================================
+ Language Locales
+============ ================================================================
+en 'en-001', 'en-150', 'en-AG', 'en-AI', 'en-AS', 'en-AT', 'en-AU', 'en-BB', 'en-BE', 'en-BI', 'en-BM', 'en-BS', 'en-BW', 'en-BZ', 'en-CA', 'en-CC', 'en-CH', 'en-CK', 'en-CM', 'en-CX', 'en-CY', 'en-DE', 'en-DG', 'en-DK', 'en-DM', 'en-ER', 'en-FI', 'en-FJ', 'en-FK', 'en-FM', 'en-GB', 'en-GD', 'en-GG', 'en-GH', 'en-GI', 'en-GM', 'en-GU', 'en-GY', 'en-HK', 'en-IE', 'en-IL', 'en-IM', 'en-IN', 'en-IO', 'en-JE', 'en-JM', 'en-KE', 'en-KI', 'en-KN', 'en-KY', 'en-LC', 'en-LR', 'en-LS', 'en-MG', 'en-MH', 'en-MO', 'en-MP', 'en-MS', 'en-MT', 'en-MU', 'en-MW', 'en-MY', 'en-NA', 'en-NF', 'en-NG', 'en-NL', 'en-NR', 'en-NU', 'en-NZ', 'en-PG', 'en-PH', 'en-PK', 'en-PN', 'en-PR', 'en-PW', 'en-RW', 'en-SB', 'en-SC', 'en-SD', 'en-SE', 'en-SG', 'en-SH', 'en-SI', 'en-SL', 'en-SS', 'en-SX', 'en-SZ', 'en-TC', 'en-TK', 'en-TO', 'en-TT', 'en-TV', 'en-TZ', 'en-UG', 'en-UM', 'en-VC', 'en-VG', 'en-VI', 'en-VU', 'en-WS', 'en-ZA', 'en-ZM', 'en-ZW'
+zh
+zh-Hans 'zh-Hans-HK', 'zh-Hans-MO', 'zh-Hans-SG'
+hi
+es 'es-419', 'es-AR', 'es-BO', 'es-BR', 'es-BZ', 'es-CL', 'es-CO', 'es-CR', 'es-CU', 'es-DO', 'es-EA', 'es-EC', 'es-GQ', 'es-GT', 'es-HN', 'es-IC', 'es-MX', 'es-NI', 'es-PA', 'es-PE', 'es-PH', 'es-PR', 'es-PY', 'es-SV', 'es-US', 'es-UY', 'es-VE'
+ar 'ar-AE', 'ar-BH', 'ar-DJ', 'ar-DZ', 'ar-EG', 'ar-EH', 'ar-ER', 'ar-IL', 'ar-IQ', 'ar-JO', 'ar-KM', 'ar-KW', 'ar-LB', 'ar-LY', 'ar-MA', 'ar-MR', 'ar-OM', 'ar-PS', 'ar-QA', 'ar-SA', 'ar-SD', 'ar-SO', 'ar-SS', 'ar-SY', 'ar-TD', 'ar-TN', 'ar-YE'
+bn 'bn-IN'
+fr 'fr-BE', 'fr-BF', 'fr-BI', 'fr-BJ', 'fr-BL', 'fr-CA', 'fr-CD', 'fr-CF', 'fr-CG', 'fr-CH', 'fr-CI', 'fr-CM', 'fr-DJ', 'fr-DZ', 'fr-GA', 'fr-GF', 'fr-GN', 'fr-GP', 'fr-GQ', 'fr-HT', 'fr-KM', 'fr-LU', 'fr-MA', 'fr-MC', 'fr-MF', 'fr-MG', 'fr-ML', 'fr-MQ', 'fr-MR', 'fr-MU', 'fr-NC', 'fr-NE', 'fr-PF', 'fr-PM', 'fr-RE', 'fr-RW', 'fr-SC', 'fr-SN', 'fr-SY', 'fr-TD', 'fr-TG', 'fr-TN', 'fr-VU', 'fr-WF', 'fr-YT'
+ur 'ur-IN'
+pt 'pt-AO', 'pt-CH', 'pt-CV', 'pt-GQ', 'pt-GW', 'pt-LU', 'pt-MO', 'pt-MZ', 'pt-PT', 'pt-ST', 'pt-TL'
+ru 'ru-BY', 'ru-KG', 'ru-KZ', 'ru-MD', 'ru-UA'
+id
+sw 'sw-CD', 'sw-KE', 'sw-UG'
+pa-Arab
+de 'de-AT', 'de-BE', 'de-CH', 'de-IT', 'de-LI', 'de-LU'
+ja
+te
+mr
+vi
+fa 'fa-AF'
+ta 'ta-LK', 'ta-MY', 'ta-SG'
+tr 'tr-CY'
+yue
+ko 'ko-KP'
+it 'it-CH', 'it-SM', 'it-VA'
+fil
+gu
+th
+kn
+ps
+zh-Hant 'zh-Hant-HK', 'zh-Hant-MO'
+ml
+or
+pl
+my
+pa
+pa-Guru
+am
+om 'om-KE'
+ha 'ha-GH', 'ha-NE'
+nl 'nl-AW', 'nl-BE', 'nl-BQ', 'nl-CW', 'nl-SR', 'nl-SX'
+uk
+uz
+uz-Latn
+yo 'yo-BJ'
+ms 'ms-BN', 'ms-SG'
+ig
+ro 'ro-MD'
+mg
+ne 'ne-IN'
+as
+so 'so-DJ', 'so-ET', 'so-KE'
+si
+km
+zu
+cs
+sv 'sv-AX', 'sv-FI'
+hu
+el 'el-CY'
+sn
+kk
+rw
+ckb 'ckb-IR'
+qu 'qu-BO', 'qu-EC'
+ak
+be
+ti 'ti-ER'
+az
+az-Latn
+af 'af-NA'
+ca 'ca-AD', 'ca-FR', 'ca-IT'
+sr-Latn 'sr-Latn-BA', 'sr-Latn-ME', 'sr-Latn-XK'
+ii
+he
+bg
+bm
+ki
+gsw 'gsw-FR', 'gsw-LI'
+sr
+sr-Cyrl 'sr-Cyrl-BA', 'sr-Cyrl-ME', 'sr-Cyrl-XK'
+ug
+zgh
+ff 'ff-CM', 'ff-GN', 'ff-MR'
+rn
+da 'da-GL'
+hr 'hr-BA'
+sq 'sq-MK', 'sq-XK'
+sk
+fi
+ks
+hy
+nb 'nb-SJ'
+luy
+lg
+lo
+bem
+kok
+luo
+uz-Cyrl
+ka
+ee 'ee-TG'
+mzn
+bs-Cyrl
+bs
+bs-Latn
+kln
+kam
+gl
+tzm
+dje
+kab
+bo 'bo-IN'
+shi-Latn
+shi
+shi-Tfng
+mn
+ln 'ln-AO', 'ln-CF', 'ln-CG'
+ky
+sg
+lt
+nyn
+guz
+cgg
+xog
+lrc 'lrc-IQ'
+mer
+lu
+sl
+teo 'teo-KE'
+brx
+nd
+mk
+uz-Arab
+mas 'mas-TZ'
+nn
+kde
+mfe
+lv
+seh
+mgh
+az-Cyrl
+ga
+eu
+yi
+ce
+et
+ksb
+bez
+ewo
+fy
+ebu
+nus
+ast
+asa
+ses
+os 'os-RU'
+br
+cy
+kea
+lag
+sah
+mt
+vun
+rof
+jmc
+lb
+dav
+dyo
+dz
+nnh
+is
+khq
+bas
+naq
+mua
+ksh
+saq
+se 'se-FI', 'se-SE'
+dua
+rwk
+mgo
+sbp
+to
+jgo
+ksf
+fo 'fo-DK'
+gd
+kl
+rm
+fur
+agq
+haw
+chr
+hsb
+wae
+nmg
+lkt
+twq
+dsb
+yav
+kw
+gv
+smn
+eo
+tl
+============ ================================================================
diff --git a/setup.py b/setup.py
index 964a806a5..9cf2f3099 100644
--- a/setup.py
+++ b/setup.py
@@ -1,16 +1,10 @@
import re
from setuptools import setup, find_packages
-open_as_utf = lambda x: open(x, encoding='utf-8')
-
-(__version__, ) = re.findall(r"__version__.*\s*=\s*[']([^']+)[']",
- open('dateparser/__init__.py').read())
-
-readme = re.sub(r':members:.+|..\sautomodule::.+|:class:|:func:', '', open_as_utf('README.rst').read())
-readme = re.sub(r'`Settings`_', '`Settings`', readme)
-readme = re.sub(r'`Contributing`_', '`Contributing`', readme)
-history = re.sub(r':mod:|:class:|:func:', '', open_as_utf('HISTORY.rst').read())
+__version__ = re.match(r"__version__.*\s*=\s*[']([^']+)[']", open('dateparser/__init__.py').read())[1]
+introduction = re.sub(r':members:.+|..\sautomodule::.+|:class:|:func:|:ref:', '', open('docs/introduction.rst').read())
+history = re.sub(r':mod:|:class:|:func:', '', open('HISTORY.rst').read())
test_requirements = open('tests/requirements.txt').read().splitlines()
@@ -18,7 +12,7 @@
name='dateparser',
version=__version__,
description='Date parsing library designed to parse dates from HTML pages',
- long_description=readme + '\n\n' + history,
+ long_description=introduction + '\n\n' + history,
author='Scrapinghub',
author_email='info@scrapinghub.com',
url='https://github.com/scrapinghub/dateparser',
diff --git a/tox.ini b/tox.ini
index 22c5282bd..2b4e86b07 100644
--- a/tox.ini
+++ b/tox.ini
@@ -23,5 +23,6 @@ changedir = docs
deps =
{[testenv]deps}
sphinx
+ sphinx-rtd-theme
commands =
sphinx-build -b html . {envtmpdir}/html