10
10
11
11
12
12
class DL :
13
- def __init__ (self , tr , output_path , filename_fmt , since_timestamp = 0 ):
13
+ def __init__ (self , tr , output_path , filename_fmt , since_timestamp = 0 , history_file = 'pytr_history' ):
14
14
'''
15
15
tr: api object
16
16
output_path: name of the directory where the downloaded files are saved
@@ -19,6 +19,7 @@ def __init__(self, tr, output_path, filename_fmt, since_timestamp=0):
19
19
'''
20
20
self .tr = tr
21
21
self .output_path = Path (output_path )
22
+ self .history_file = self .output_path / history_file
22
23
self .filename_fmt = filename_fmt
23
24
self .since_timestamp = since_timestamp
24
25
@@ -29,8 +30,20 @@ def __init__(self, tr, output_path, filename_fmt, since_timestamp=0):
29
30
self .done = 0
30
31
self .filepaths = []
31
32
self .doc_urls = []
33
+ self .doc_urls_history = []
32
34
self .tl = Timeline (self .tr )
33
35
self .log = get_logger (__name__ )
36
+ self .load_history ()
37
+
38
+ def load_history (self ):
39
+ if self .history_file .exists ():
40
+ with self .history_file .open () as f :
41
+ self .doc_urls_history = f .read ().splitlines ()
42
+ self .log .info (f'Found { len (self .doc_urls_history )} lines in history file' )
43
+ else :
44
+ self .history_file .parent .mkdir (exist_ok = True , parents = True )
45
+ self .history_file .touch ()
46
+ self .log .info ('Created history file' )
34
47
35
48
async def dl_loop (self ):
36
49
await self .tl .get_next_timeline (max_age_timestamp = self .since_timestamp )
@@ -102,12 +115,17 @@ def dl_doc(self, doc, titleText, subtitleText, subfolder=None):
102
115
if doc_url_base in self .doc_urls :
103
116
self .log .debug (f'URL { doc_url_base } already in queue. Skipping...' )
104
117
return
118
+ elif doc_url_base in self .doc_urls_history :
119
+ self .log .debug (f'URL { doc_url_base } already in history. Skipping...' )
120
+ return
105
121
else :
106
122
self .doc_urls .append (doc_url_base )
107
123
108
124
future = self .session .get (doc_url )
109
125
future .filepath = filepath
126
+ future .doc_url_base = doc_url_base
110
127
self .futures .append (future )
128
+ self .log .debug (f'Added { filepath } to queue' )
111
129
else :
112
130
self .log .debug (f'file { filepath } already exists. Skipping...' )
113
131
@@ -119,24 +137,21 @@ def work_responses(self):
119
137
self .log .info ('Nothing to download' )
120
138
exit (0 )
121
139
122
- self .log .info ('Waiting for downloads to complete..' )
123
- for future in as_completed (self .futures ):
124
- if future .filepath .is_file () is True :
125
- self .log .debug (f'file { future .filepath } was already downloaded.' )
140
+ with self .history_file .open ('a' ) as history_file :
141
+ self .log .info ('Waiting for downloads to complete..' )
142
+ for future in as_completed (self .futures ):
143
+ if future .filepath .is_file () is True :
144
+ self .log .debug (f'file { future .filepath } was already downloaded.' )
126
145
127
- r = future .result ()
128
- future .filepath .parent .mkdir (parents = True , exist_ok = True )
129
- with open (future .filepath , 'wb' ) as f :
130
- f .write (r .content )
131
- self .done += 1
146
+ r = future .result ()
147
+ future .filepath .parent .mkdir (parents = True , exist_ok = True )
148
+ with open (future .filepath , 'wb' ) as f :
149
+ f .write (r .content )
150
+ self .done += 1
151
+ history_file .write (f'{ future .doc_url_base } \n ' )
132
152
133
- self .log .debug (f'{ self .done :>3} /{ len (self .doc_urls )} { future .filepath .name } ' )
153
+ self .log .debug (f'{ self .done :>3} /{ len (self .doc_urls )} { future .filepath .name } ' )
134
154
135
- if self .done == len (self .doc_urls ):
136
- self .log .info ('Done.' )
137
- exit (0 )
138
-
139
- def dl_all (output_path ):
140
- '''
141
- TODO
142
- '''
155
+ if self .done == len (self .doc_urls ):
156
+ self .log .info ('Done.' )
157
+ exit (0 )
0 commit comments