18
18
from tornado .httputil import HTTPHeaders
19
19
from tornado .escape import native_str
20
20
21
- try :
22
- import pycurl
23
- except ImportError as e :
24
- print (e )
25
- pycurl = None
26
21
from jinja2 .sandbox import SandboxedEnvironment as Environment
27
- from tornado import gen , httpclient
22
+ from tornado import gen , httpclient , simple_httpclient
28
23
29
24
import config
30
25
from libs import cookie_utils , utils
31
26
27
+ if config .use_pycurl :
28
+ try :
29
+ import pycurl
30
+ except ImportError as e :
31
+ print (e )
32
+ pycurl = None
33
+ else :
34
+ pycurl = None
35
+ NOT_RETYR_CODE = config .not_retry_code
32
36
logger = logging .getLogger ('qiandao.fetcher' )
33
37
34
-
35
38
class Fetcher (object ):
36
39
def __init__ (self , download_size_limit = config .download_size_limit ):
37
40
if pycurl :
@@ -416,25 +419,29 @@ def build_request(en):
416
419
version = '1.2'
417
420
)
418
421
)
419
- async def build_response (self , obj , proxy = {}, CURL_ENCODING = True , CURL_CONTENT_LENGTH = True ):
422
+ async def build_response (self , obj , proxy = {}, CURL_ENCODING = config . curl_encoding , CURL_CONTENT_LENGTH = config . curl_length , EMPTY_RETRY = config . empty_retry ):
420
423
try :
421
424
req , rule , env = self .build_request (obj , download_size_limit = self .download_size_limit ,proxy = proxy ,CURL_ENCODING = CURL_ENCODING ,CURL_CONTENT_LENGTH = CURL_CONTENT_LENGTH )
422
425
response = await gen .convert_yielded (self .client .fetch (req ))
423
426
except httpclient .HTTPError as e :
424
427
try :
425
- if e .__dict__ .get ('errno' ,'' ) == 61 :
426
- req , rule , env = self .build_request (obj , download_size_limit = self .download_size_limit ,proxy = proxy ,CURL_ENCODING = False )
427
- e .response = await gen .convert_yielded (self .client .fetch (req ))
428
- elif e .code == 400 and e .message == 'Bad Request' and not e .response :
429
- if req and req .headers .get ('content-length' ):
430
- req , rule , env = self .build_request (obj , download_size_limit = self .download_size_limit ,proxy = proxy ,CURL_CONTENT_LENGTH = False )
428
+ if config .allow_retry and pycurl :
429
+ if e .__dict__ .get ('errno' ,'' ) == 61 :
430
+ logger .warning ('{} {} [Warning] {} -> Try to retry!' .format (req .method ,req .url ,e ))
431
+ req , rule , env = self .build_request (obj , download_size_limit = self .download_size_limit ,proxy = proxy ,CURL_ENCODING = False ,CURL_CONTENT_LENGTH = CURL_CONTENT_LENGTH )
431
432
e .response = await gen .convert_yielded (self .client .fetch (req ))
432
- else :
433
- httpclient . AsyncHTTPClient . configure ( None )
434
- req , rule , env = self .build_request (obj , download_size_limit = self .download_size_limit ,proxy = proxy )
433
+ elif e . code == 400 and e . message == 'Bad Request' and req and req . headers . get ( 'content-length' ) :
434
+ logger . warning ( '{} {} [Warning] {} -> Try to retry!' . format ( req . method , req . url , e ) )
435
+ req , rule , env = self .build_request (obj , download_size_limit = self .download_size_limit ,proxy = proxy , CURL_ENCODING = CURL_ENCODING , CURL_CONTENT_LENGTH = False )
435
436
e .response = await gen .convert_yielded (self .client .fetch (req ))
436
- if pycurl :
437
- httpclient .AsyncHTTPClient .configure ('tornado.curl_httpclient.CurlAsyncHTTPClient' )
437
+ elif e .code not in NOT_RETYR_CODE or (EMPTY_RETRY and not e .response ):
438
+ logger .warning ('{} {} [Warning] {} -> Try to retry!' .format (req .method ,req .url ,e ))
439
+ client = simple_httpclient .SimpleAsyncHTTPClient ()
440
+ e .response = await gen .convert_yielded (client .fetch (req ))
441
+ else :
442
+ logger .warning ('{} {} [Warning] {}' .format (req .method ,req .url ,e ))
443
+ else :
444
+ logger .warning ('{} {} [Warning] {}' .format (req .method ,req .url ,e ))
438
445
finally :
439
446
if not e .response :
440
447
traceback .print_exc ()
@@ -443,7 +450,7 @@ async def build_response(self, obj, proxy={}, CURL_ENCODING=True, CURL_CONTENT_L
443
450
return rule , env , e .response
444
451
return rule , env , response
445
452
446
- async def fetch (self , obj , proxy = {}, CURL_ENCODING = True , CURL_CONTENT_LENGTH = True ):
453
+ async def fetch (self , obj , proxy = {}, CURL_ENCODING = config . curl_encoding , CURL_CONTENT_LENGTH = config . curl_length , EMPTY_RETRY = config . empty_retry ):
447
454
"""
448
455
obj = {
449
456
request: {
@@ -468,7 +475,7 @@ async def fetch(self, obj, proxy={}, CURL_ENCODING=True, CURL_CONTENT_LENGTH=Tru
468
475
}
469
476
"""
470
477
471
- rule , env , response = await gen .convert_yielded (self .build_response (obj , proxy , CURL_ENCODING , CURL_CONTENT_LENGTH ))
478
+ rule , env , response = await gen .convert_yielded (self .build_response (obj , proxy , CURL_ENCODING , CURL_CONTENT_LENGTH , EMPTY_RETRY ))
472
479
473
480
env ['session' ].extract_cookies_to_jar (response .request , response )
474
481
success , msg = self .run_rule (response , rule , env )
0 commit comments