From 1b3cb79ce4223ebe6e68bc6ca202f11e69774c9f Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Tue, 4 May 2021 13:21:06 -0700 Subject: [PATCH 01/10] Feat: begin session tracking in epidata for requests --- src/client/delphi_epidata.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/client/delphi_epidata.py b/src/client/delphi_epidata.py index cac9b285d..8d88c8a51 100644 --- a/src/client/delphi_epidata.py +++ b/src/client/delphi_epidata.py @@ -63,9 +63,11 @@ def _request(params): """ try: # API call - req = requests.get(Epidata.BASE_URL, params, headers=_HEADERS) - if req.status_code == 414: - req = requests.post(Epidata.BASE_URL, params, headers=_HEADERS) + with requests.Session() as session: + req = session.request('get', Epidata.BASE_URL, params, headers=_HEADERS) + # req = requests.get(Epidata.BASE_URL, params, headers=_HEADERS) + if req.status_code == 414: + req = requests.post(Epidata.BASE_URL, params, headers=_HEADERS) return req.json() except Exception as e: # Something broke From bfe961166b1770162c9ad3f06589a217e24fd144 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 5 May 2021 14:09:07 -0700 Subject: [PATCH 02/10] Feat: more caching --- src/client/delphi_epidata.py | 28 +++++++++---- tests/client/test_delphi_epidata.py | 62 +++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 7 deletions(-) diff --git a/src/client/delphi_epidata.py b/src/client/delphi_epidata.py index 8d88c8a51..32144528f 100644 --- a/src/client/delphi_epidata.py +++ b/src/client/delphi_epidata.py @@ -10,8 +10,10 @@ # External modules import requests +import requests_cache import asyncio import warnings +from typing import Union, Iterable, Tuple, List from aiohttp import ClientSession, TCPConnector from pkg_resources import get_distribution, DistributionNotFound @@ -54,7 +56,7 @@ def _list(values): # Helper function to request and parse epidata @staticmethod - def _request(params): + def _request(params, cache_timeout: Union[int, None] = None): """Request and parse epidata. We default to GET since it has better caching and logging @@ -63,11 +65,18 @@ def _request(params): """ try: # API call - with requests.Session() as session: - req = session.request('get', Epidata.BASE_URL, params, headers=_HEADERS) - # req = requests.get(Epidata.BASE_URL, params, headers=_HEADERS) - if req.status_code == 414: - req = requests.post(Epidata.BASE_URL, params, headers=_HEADERS) + # Use cache if cache_timeout is set + if cache_timeout is not None: + session = requests_cache.CachedSession( + 'demo_cache', expire_after=cache_timeout + ) + else: + session = requests.Session() + req = session.request('get', Epidata.BASE_URL, params, headers=_HEADERS) + # Fallback to requests if we have to use POST + if req.status_code == 414: + req = requests.post(Epidata.BASE_URL, params, headers=_HEADERS) + session.close() return req.json() except Exception as e: # Something broke @@ -603,8 +612,13 @@ def covidcast( if 'format' in kwargs: params['format'] = kwargs['format'] + if 'cache_timeout' in kwargs: + cache_timeout = kwargs['cache_timeout'] + else: + cache_timeout = None + # Make the API call - return Epidata._request(params) + return Epidata._request(params, cache_timeout) # Fetch Delphi's COVID-19 Surveillance Streams metadata @staticmethod diff --git a/tests/client/test_delphi_epidata.py b/tests/client/test_delphi_epidata.py index ad60f3461..46b863842 100644 --- a/tests/client/test_delphi_epidata.py +++ b/tests/client/test_delphi_epidata.py @@ -2,10 +2,15 @@ # standard library import unittest +import time +from datetime import date + +import pandas as pd # py3tester coverage target __test_target__ = 'delphi.epidata.client.delphi_epidata' +from delphi.epidata.client.delphi_epidata import Epidata class UnitTests(unittest.TestCase): """Basic unit tests.""" @@ -14,3 +19,60 @@ class UnitTests(unittest.TestCase): # the target file can't be loaded. In effect, it's a syntax checker. def test_syntax(self): pass + + + +def test_requests_caching(): + data_source = "chng" + signal = "smoothed_outpatient_covid" + time_type = "day" + start_day = date(2021, 1, 2) + end_day = date(2021, 1, 12) + # day_str = "-".join( + # [date.strftime("%Y%m%d") for date in pd.date_range(start_day, end_day)] + # ) + day_str = date(2021, 4, 15).strftime("%Y%m%d") + day_strs = [date.strftime("%Y%m%d") for date in pd.date_range(start_day, end_day)] + geo_type = "state" + + then = int(time.time()) + dfs = [] + breakpoint() + for day_str in day_strs: + res = Epidata.covidcast( + data_source, signal, time_type=time_type, + geo_type=geo_type, time_values=day_str, + geo_value="ca", cache_timeout=20 + ) + df = pd.DataFrame.from_dict(res['epidata']) + dfs.append(df) + df1 = pd.concat(dfs) + now = int(time.time()) + time_delta1 = now - then + + then = int(time.time()) + for i in range(20): + df2 = Epidata.covidcast( + data_source, signal, time_type=time_type, + geo_type=geo_type, time_values=day_str, + geo_value="*", cache_timeout=20 + ) + now = int(time.time()) + time_delta2 = now - then + + time.sleep(20) + then = int(time.time()) + for i in range(20): + df3 = Epidata.covidcast( + data_source, signal, time_type=time_type, + geo_type=geo_type, time_values=day_str, + geo_value="*", cache_timeout=20 + ) + now = int(time.time()) + time_delta3 = now - then + + breakpoint() + print("Rows retrieved: {1}, Time taken: {0}".format(df1.size, time_delta1)) + print("Rows retrieved: {1}, Time taken: {0}".format(df2.size, time_delta2)) + print("Rows retrieved: {1}, Time taken: {0}".format(df3.size, time_delta3)) + 2 From c912ebfd12ffee853b6504daff45c4131e9d0af7 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Thu, 13 May 2021 18:39:16 -0700 Subject: [PATCH 03/10] Epidata cache: update tests --- integrations/client/test_delphi_epidata.py | 22 ++++-- tests/client/test_delphi_epidata.py | 78 ---------------------- 2 files changed, 16 insertions(+), 84 deletions(-) delete mode 100644 tests/client/test_delphi_epidata.py diff --git a/integrations/client/test_delphi_epidata.py b/integrations/client/test_delphi_epidata.py index c63a8e1be..d9ad6aaa5 100644 --- a/integrations/client/test_delphi_epidata.py +++ b/integrations/client/test_delphi_epidata.py @@ -285,19 +285,29 @@ def test_covidcast(self): self.assertEqual(response_1, {'message': 'no results', 'result': -2}) @patch('requests.post') - @patch('requests.get') - def test_request_method(self, get, post): + @patch('requests_cache.CachedSession') + @patch('requests.Session') + def test_request_method(self, _Session, _CachedSession, post): """Test that a GET request is default and POST is used if a 414 is returned.""" - with self.subTest(name='get request'): + with self.subTest(name='get request, no cache'): + Session = MagicMock() + _Session.return_value = Session Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '01234') - get.assert_called_once() + Session.request.assert_called_once() + post.assert_not_called() + with self.subTest(name='get request, cache'): + CachedSession = MagicMock() + _CachedSession.return_value = CachedSession + Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '01234', cache_timeout=5) + CachedSession.request.assert_called_once() post.assert_not_called() with self.subTest(name='post request'): mock_response = MagicMock() mock_response.status_code = 414 - get.return_value = mock_response + Session = MagicMock() + Session.request.return_value = mock_response + _Session.return_value = Session Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '01234') - self.assertEqual(get.call_count, 2) # one from post test and one from get test post.assert_called_once() def test_geo_value(self): diff --git a/tests/client/test_delphi_epidata.py b/tests/client/test_delphi_epidata.py deleted file mode 100644 index 46b863842..000000000 --- a/tests/client/test_delphi_epidata.py +++ /dev/null @@ -1,78 +0,0 @@ -"""Unit tests for delphi_epidata.py.""" - -# standard library -import unittest -import time -from datetime import date - -import pandas as pd - -# py3tester coverage target -__test_target__ = 'delphi.epidata.client.delphi_epidata' - -from delphi.epidata.client.delphi_epidata import Epidata - -class UnitTests(unittest.TestCase): - """Basic unit tests.""" - - # TODO: Unit tests still need to be written. This no-op test will pass unless - # the target file can't be loaded. In effect, it's a syntax checker. - def test_syntax(self): - pass - - - -def test_requests_caching(): - data_source = "chng" - signal = "smoothed_outpatient_covid" - time_type = "day" - start_day = date(2021, 1, 2) - end_day = date(2021, 1, 12) - # day_str = "-".join( - # [date.strftime("%Y%m%d") for date in pd.date_range(start_day, end_day)] - # ) - day_str = date(2021, 4, 15).strftime("%Y%m%d") - day_strs = [date.strftime("%Y%m%d") for date in pd.date_range(start_day, end_day)] - geo_type = "state" - - then = int(time.time()) - dfs = [] - breakpoint() - for day_str in day_strs: - res = Epidata.covidcast( - data_source, signal, time_type=time_type, - geo_type=geo_type, time_values=day_str, - geo_value="ca", cache_timeout=20 - ) - df = pd.DataFrame.from_dict(res['epidata']) - dfs.append(df) - df1 = pd.concat(dfs) - now = int(time.time()) - time_delta1 = now - then - - then = int(time.time()) - for i in range(20): - df2 = Epidata.covidcast( - data_source, signal, time_type=time_type, - geo_type=geo_type, time_values=day_str, - geo_value="*", cache_timeout=20 - ) - now = int(time.time()) - time_delta2 = now - then - - time.sleep(20) - then = int(time.time()) - for i in range(20): - df3 = Epidata.covidcast( - data_source, signal, time_type=time_type, - geo_type=geo_type, time_values=day_str, - geo_value="*", cache_timeout=20 - ) - now = int(time.time()) - time_delta3 = now - then - - breakpoint() - print("Rows retrieved: {1}, Time taken: {0}".format(df1.size, time_delta1)) - print("Rows retrieved: {1}, Time taken: {0}".format(df2.size, time_delta2)) - print("Rows retrieved: {1}, Time taken: {0}".format(df3.size, time_delta3)) - 2 From bac7ef866eb6dac50d19b2e3d9002152b3d33038 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Fri, 14 May 2021 14:44:45 -0700 Subject: [PATCH 04/10] Add delphi-epidata test framework back --- tests/client/test_delphi_epidata.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/client/test_delphi_epidata.py diff --git a/tests/client/test_delphi_epidata.py b/tests/client/test_delphi_epidata.py new file mode 100644 index 000000000..47c1f1eee --- /dev/null +++ b/tests/client/test_delphi_epidata.py @@ -0,0 +1,21 @@ +"""Unit tests for delphi_epidata.py.""" + +# standard library +import unittest +import time +from datetime import date + +import pandas as pd + +# py3tester coverage target +__test_target__ = 'delphi.epidata.client.delphi_epidata' + +from delphi.epidata.client.delphi_epidata import Epidata + +class UnitTests(unittest.TestCase): + """Basic unit tests.""" + + # TODO: Unit tests still need to be written. This no-op test will pass unless + # the target file can't be loaded. In effect, it's a syntax checker. + def test_syntax(self): + pass From a7c4c8028b3a63d117caa732346b937dbca8fc05 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Tue, 18 May 2021 15:05:34 -0700 Subject: [PATCH 05/10] Client cache: consistent session use, misc cleaning --- integrations/client/test_delphi_epidata.py | 7 ++----- src/client/delphi_epidata.py | 24 ++++++++++------------ 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/integrations/client/test_delphi_epidata.py b/integrations/client/test_delphi_epidata.py index d9ad6aaa5..b18508879 100644 --- a/integrations/client/test_delphi_epidata.py +++ b/integrations/client/test_delphi_epidata.py @@ -284,23 +284,20 @@ def test_covidcast(self): # check result self.assertEqual(response_1, {'message': 'no results', 'result': -2}) - @patch('requests.post') @patch('requests_cache.CachedSession') @patch('requests.Session') - def test_request_method(self, _Session, _CachedSession, post): + def test_request_method(self, _Session, _CachedSession): """Test that a GET request is default and POST is used if a 414 is returned.""" with self.subTest(name='get request, no cache'): Session = MagicMock() _Session.return_value = Session Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '01234') Session.request.assert_called_once() - post.assert_not_called() with self.subTest(name='get request, cache'): CachedSession = MagicMock() _CachedSession.return_value = CachedSession Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '01234', cache_timeout=5) CachedSession.request.assert_called_once() - post.assert_not_called() with self.subTest(name='post request'): mock_response = MagicMock() mock_response.status_code = 414 @@ -308,7 +305,7 @@ def test_request_method(self, _Session, _CachedSession, post): Session.request.return_value = mock_response _Session.return_value = Session Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '01234') - post.assert_called_once() + assert Session.request.call_count == 2 def test_geo_value(self): """test different variants of geo types: single, *, multi.""" diff --git a/src/client/delphi_epidata.py b/src/client/delphi_epidata.py index 32144528f..a2bb50246 100644 --- a/src/client/delphi_epidata.py +++ b/src/client/delphi_epidata.py @@ -9,15 +9,17 @@ """ # External modules -import requests -import requests_cache +from typing import Union, Optional +from datetime import timedelta, datetime +from requests import Session +from requests_cache import CachedSession import asyncio -import warnings -from typing import Union, Iterable, Tuple, List from aiohttp import ClientSession, TCPConnector from pkg_resources import get_distribution, DistributionNotFound +CacheTime = Union[int, datetime, timedelta] + # Obtain package version for the user-agent. Uses the installed version by # preference, even if you've installed it and then use this script independently # by accident. @@ -56,7 +58,7 @@ def _list(values): # Helper function to request and parse epidata @staticmethod - def _request(params, cache_timeout: Union[int, None] = None): + def _request(params, cache_timeout: Optional[CacheTime] = None): """Request and parse epidata. We default to GET since it has better caching and logging @@ -65,17 +67,13 @@ def _request(params, cache_timeout: Union[int, None] = None): """ try: # API call - # Use cache if cache_timeout is set - if cache_timeout is not None: - session = requests_cache.CachedSession( - 'demo_cache', expire_after=cache_timeout - ) - else: - session = requests.Session() + session = Session() if cache_timeout is None else CachedSession( + 'covidcast_cache', expire_after=cache_timeout + ) req = session.request('get', Epidata.BASE_URL, params, headers=_HEADERS) # Fallback to requests if we have to use POST if req.status_code == 414: - req = requests.post(Epidata.BASE_URL, params, headers=_HEADERS) + req = session.request('post', Epidata.BASE_URL, params, headers=_HEADERS) session.close() return req.json() except Exception as e: From 894d8b5e6d0ce8a487a260eece455ea42688eabd Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 19 May 2021 00:01:01 -0700 Subject: [PATCH 06/10] Client cache: update tests, add cache function argument --- integrations/client/test_delphi_epidata.py | 11 +++++++---- src/client/delphi_epidata.py | 7 +------ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/integrations/client/test_delphi_epidata.py b/integrations/client/test_delphi_epidata.py index b18508879..9736ea6d0 100644 --- a/integrations/client/test_delphi_epidata.py +++ b/integrations/client/test_delphi_epidata.py @@ -2,7 +2,7 @@ # standard library import unittest -from unittest.mock import patch, MagicMock +from unittest.mock import patch, MagicMock, call # third party from aiohttp.client_exceptions import ClientResponseError @@ -292,12 +292,14 @@ def test_request_method(self, _Session, _CachedSession): Session = MagicMock() _Session.return_value = Session Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '01234') - Session.request.assert_called_once() + assert call('get') in Session.request.call_args_list() + assert call('post') not in Session.request.call_args_list() with self.subTest(name='get request, cache'): CachedSession = MagicMock() _CachedSession.return_value = CachedSession Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '01234', cache_timeout=5) - CachedSession.request.assert_called_once() + assert call('get') in CachedSession.request.call_args_list() + assert call('post') not in CachedSession.request.call_args_list() with self.subTest(name='post request'): mock_response = MagicMock() mock_response.status_code = 414 @@ -305,7 +307,8 @@ def test_request_method(self, _Session, _CachedSession): Session.request.return_value = mock_response _Session.return_value = Session Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '01234') - assert Session.request.call_count == 2 + assert call('get') in Session.request.call_args_list() + assert call('post') in Session.request.call_args_list() def test_geo_value(self): """test different variants of geo types: single, *, multi.""" diff --git a/src/client/delphi_epidata.py b/src/client/delphi_epidata.py index a2bb50246..0ce28fa56 100644 --- a/src/client/delphi_epidata.py +++ b/src/client/delphi_epidata.py @@ -576,7 +576,7 @@ def meta(): @staticmethod def covidcast( data_source, signals, time_type, geo_type, - time_values, geo_value, as_of=None, issues=None, lag=None, **kwargs): + time_values, geo_value, as_of=None, issues=None, lag=None, cache_timeout=None, **kwargs): """Fetch Delphi's COVID-19 Surveillance Streams""" # also support old parameter name if signals is None and 'signal' in kwargs: @@ -610,11 +610,6 @@ def covidcast( if 'format' in kwargs: params['format'] = kwargs['format'] - if 'cache_timeout' in kwargs: - cache_timeout = kwargs['cache_timeout'] - else: - cache_timeout = None - # Make the API call return Epidata._request(params, cache_timeout) From ec5bdd59a3fe8dab605b4e980f41a3f99e599eb5 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 19 May 2021 00:03:04 -0700 Subject: [PATCH 07/10] Client cache: revert tests/client/test_delphi_epidata --- tests/client/test_delphi_epidata.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/client/test_delphi_epidata.py b/tests/client/test_delphi_epidata.py index 47c1f1eee..30f488292 100644 --- a/tests/client/test_delphi_epidata.py +++ b/tests/client/test_delphi_epidata.py @@ -2,16 +2,10 @@ # standard library import unittest -import time -from datetime import date - -import pandas as pd # py3tester coverage target __test_target__ = 'delphi.epidata.client.delphi_epidata' -from delphi.epidata.client.delphi_epidata import Epidata - class UnitTests(unittest.TestCase): """Basic unit tests.""" From 11164f5c7378c253a6ad58f885eba1228a87d067 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 19 May 2021 00:05:20 -0700 Subject: [PATCH 08/10] Client cache: revert more --- tests/client/test_delphi_epidata.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/client/test_delphi_epidata.py b/tests/client/test_delphi_epidata.py index 30f488292..ad60f3461 100644 --- a/tests/client/test_delphi_epidata.py +++ b/tests/client/test_delphi_epidata.py @@ -6,6 +6,7 @@ # py3tester coverage target __test_target__ = 'delphi.epidata.client.delphi_epidata' + class UnitTests(unittest.TestCase): """Basic unit tests.""" From 5700a1666f58aa91b82afee9623275ae8c488cc9 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 19 May 2021 11:56:35 -0700 Subject: [PATCH 09/10] Client cache: add requests-cache to web_python Dockerfile --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9cae4fa02..d9805f186 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,9 @@ Flask==1.1.2 SQLAlchemy==1.3.22 mysqlclient==2.0.2 +newrelic python-dotenv==0.15.0 orjson==3.4.7 pandas==1.2.3 +requests-cache scipy==1.6.2 -newrelic From 8c780961598d1a735c74474c300266dfc0f2847c Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 19 May 2021 12:55:43 -0700 Subject: [PATCH 10/10] Client cache: fix call_args_list --- integrations/client/test_delphi_epidata.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/integrations/client/test_delphi_epidata.py b/integrations/client/test_delphi_epidata.py index 9736ea6d0..af8b384ab 100644 --- a/integrations/client/test_delphi_epidata.py +++ b/integrations/client/test_delphi_epidata.py @@ -292,14 +292,14 @@ def test_request_method(self, _Session, _CachedSession): Session = MagicMock() _Session.return_value = Session Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '01234') - assert call('get') in Session.request.call_args_list() - assert call('post') not in Session.request.call_args_list() + assert call('get') in Session.request.call_args_list + assert call('post') not in Session.request.call_args_list with self.subTest(name='get request, cache'): CachedSession = MagicMock() _CachedSession.return_value = CachedSession Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '01234', cache_timeout=5) - assert call('get') in CachedSession.request.call_args_list() - assert call('post') not in CachedSession.request.call_args_list() + assert call('get') in CachedSession.request.call_args_list + assert call('post') not in CachedSession.request.call_args_list with self.subTest(name='post request'): mock_response = MagicMock() mock_response.status_code = 414 @@ -307,8 +307,8 @@ def test_request_method(self, _Session, _CachedSession): Session.request.return_value = mock_response _Session.return_value = Session Epidata.covidcast('src', 'sig', 'day', 'county', 20200414, '01234') - assert call('get') in Session.request.call_args_list() - assert call('post') in Session.request.call_args_list() + assert call('get') in Session.request.call_args_list + assert call('post') in Session.request.call_args_list def test_geo_value(self): """test different variants of geo types: single, *, multi."""