diff --git a/AutoDownloadCVPR2019.py b/AutoDownloadCVPR2019.py index 6f1d2c3..cc107b2 100644 --- a/AutoDownloadCVPR2019.py +++ b/AutoDownloadCVPR2019.py @@ -3,6 +3,7 @@ @author: 51takahashi """ +from pathlib import Path import os import requests @@ -13,6 +14,7 @@ def name_check(name): name = name.replace('?','') name = name.replace(':','') name = name.replace('*','') + name = name.replace('\"','') name = name.replace('/',' or ') return name @@ -23,13 +25,21 @@ def main(): txt = r.text; lines = txt.split('\n') cnt = 0 + + existing_files = [f.stem for f in Path(conf).glob('*.pdf')] + for line in lines: if line.find('
')>-1: pdfname = conf+'/'+name_check(line.split('>')[3].split('<')[0])+'.pdf' cnt+=1 + if len(line)>0: if line[0]=='[': print(str(cnt)+':'+pdfname) + + if pdfname.replace('.pdf', '') in existing_files: + continue + if not os.path.exists(pdfname): url = header+line.split('"')[1] r = requests.get(url) @@ -38,4 +48,4 @@ def main(): f.close() if __name__ == '__main__': - main() + main()