Skip to content

Commit f73fad1

Browse files
committed
FIX: WARC-Simple-Form-Province-Status is ignored if HTML does not have
META ROBOTS tag.
1 parent 67ebc07 commit f73fad1

File tree

3 files changed

+1
-6
lines changed

3 files changed

+1
-6
lines changed

cdx_writer.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -237,12 +237,6 @@ def get_AIF_meta_tags(self, record):
237237
"""
238238
x_robots_tag = self.parse_http_header('x-robots-tag')
239239

240-
if x_robots_tag is None:
241-
if not self.meta_tags:
242-
return '-'
243-
if 'robots' not in self.meta_tags:
244-
return '-'
245-
246240
robot_tags = []
247241
if self.meta_tags and 'robots' in self.meta_tags:
248242
robot_tags += self.meta_tags['robots'].split(',')
3.41 KB
Binary file not shown.

tests/test_small_warcs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
'meta_tag_large.warc.gz': 'com,richmondstrikers)/alumni/index_e.html 20131104222633 http://www.richmondstrikers.com/Alumni/index_E.html text/html 200 U7ST7H2CUXSLGCOYR3KT5POAPZI3KY7B - - 21796 0 meta_tag_large.warc.gz', #from NO404-WKP-20131104215558-crawl345/NO404-WKP-20131104222227-08103.warc.gz
3434
'wget_ia.warc.gz': 'org,archive)/ 20140314173216 https://archive.org/ text/html 200 ZSSZNM66RWQWZ7FMNEP2XEAORAULQHMY - - 6891 414 wget_ia.warc.gz', #created with wget 1.14
3535
'password-protected.warc.gz': 'com,facebook)/login.php?login_attempt=1 20140331214328 https://www.facebook.com/login.php?login_attempt=1 text/html 200 I45Q4NNB7NJJQ6CULNCSXQBAYSDTYB7U - P 7147 0 password-protected.warc.gz', #from ARCHIVEIT-3007-NONE-10537-20140331214316874-00000-desktop-nlevitt.sf.archive.org-6440.warc.gz
36+
'password-protected-no-meta.warc.gz':'au,edu,unimelb,youngscholars)/ 20130813000800 http://youngscholars.unimelb.edu.au/ text/html 200 KPN526ULL7I6IELN2N6QWQUE2RPS335P - P 3494 0 password-protected-no-meta.warc.gz'
3637
}
3738

3839
for file, cdx in warcs.iteritems():

0 commit comments

Comments
 (0)