|
2 | 2 | from itertools import chain
|
3 | 3 | from typing import Iterable, Iterator, TypeVar, Generic, Type, Callable
|
4 | 4 | from uuid import uuid5
|
| 5 | +from warnings import warn |
5 | 6 |
|
6 | 7 | from click import echo
|
7 | 8 | from elasticsearch_dsl import Search
|
8 | 9 | from elasticsearch_dsl.function import RandomScore
|
9 | 10 | from elasticsearch_dsl.query import Exists, FunctionScore, Term, RankFeature
|
| 11 | +from requests import ConnectionError as RequestsConnectionError |
10 | 12 | from tqdm.auto import tqdm
|
11 | 13 | from warc_s3 import WarcS3Record
|
12 | 14 | from warcio.recordloader import ArcWarcRecord
|
@@ -71,11 +73,18 @@ def _download_serp_warc(
|
71 | 73 | api_url=serp.archive.memento_api_url,
|
72 | 74 | session=config.http.session,
|
73 | 75 | )
|
74 |
| - records = memento_api.load_url_warc( |
75 |
| - url=serp.capture.url, |
76 |
| - timestamp=serp.capture.timestamp, |
77 |
| - raw=True, |
78 |
| - ) |
| 76 | + try: |
| 77 | + records = memento_api.load_url_warc( |
| 78 | + url=serp.capture.url, |
| 79 | + timestamp=serp.capture.timestamp, |
| 80 | + raw=True, |
| 81 | + ) |
| 82 | + except RequestsConnectionError: |
| 83 | + warn(RuntimeWarning( |
| 84 | + f"Connection error while downloading WARC " |
| 85 | + f"for capture URL {serp.capture.url} at {serp.capture.timestamp}." |
| 86 | + )) |
| 87 | + return |
79 | 88 | for record in records:
|
80 | 89 | yield _SerpArcWarcRecord(serp, record)
|
81 | 90 |
|
|
0 commit comments