Skip to content

Commit 042ea58

Browse files
committed
fetcher重试逻辑支持config配置
1 parent 84f84fa commit 042ea58

File tree

3 files changed

+50
-28
lines changed

3 files changed

+50
-28
lines changed

README.md

+9-3
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,11 @@
4141

4242
8. **Docker已预装Curl环境, 默认安装pycurl模组**
4343

44-
```
44+
```bash
4545
# 如需使用Proxy功能请安装PyCurl
4646
# Windows源码运行, 请执行 pip install pycurl==7.43.0.5
4747
pip install pycurl # pip3 install pycurl
48-
# 如因curl导致500或599错误, 请卸载PyCurl
48+
# 如因curl导致500或599错误, 请卸载PyCurl或修改环境变量USE_PYCURL为False
4949
# pip uninstall pycurl
5050
```
5151

@@ -118,6 +118,12 @@ COOKIE_SECRET|否|binux|cookie加密密钥, 强烈建议修改
118118
PROXIES|否|""|全局代理域名列表,用"|"分隔
119119
PROXY_DIRECT_MODE|否|""|全局代理黑名单模式,默认不开启 <br>"url"为网址匹配模式;"regexp"为正则表达式匹配模式
120120
PROXY_DIRECT|否|""|全局代理黑名单匹配规则
121+
USE_PYCURL|否|True|是否启用Pycurl模组
122+
ALLOW_RETRY|否|True|在Pycurl环境下部分请求可能导致Request错误时, <br>自动修改冲突设置并重发请求
123+
CURL_ENCODING|否|True|是否允许使用Curl进行Encoding操作
124+
CURL_CONTENT_LENGTH|否|True|是否允许Curl使用Headers中自定义Content-Length请求
125+
NOT_RETRY_CODE|否|[详见配置](config.py)...|[详见配置](config.py)...
126+
empty_retry|否|True|[详见配置](config.py)...
121127

122128
> 详细信息请查阅[config.py](config.py)
123129
@@ -146,7 +152,7 @@ sh /usr/src/app/update.sh # 先进入容器后台, 执行命令后重启进程
146152
=========
147153
## 2021.09.28 更新
148154
1. 更新并优化fetcher脚本
149-
2. 更新输入提示
155+
2. 更新输入提示(by [cxk000](https://github.com/a76yyyy/qiandao/commits?author=ckx000))
150156
3. 修复{{unicode(arg)}}不能正常转换unicode的bug
151157
4. 修复重复添加'|urlencode'的bug
152158
5. 优化fetcher重试逻辑

config.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,24 @@
99
import hashlib
1010
from urllib.parse import urlparse
1111

12-
debug = False # 是否开启Debug
13-
gzip = True # 是否启用gzip
14-
bind = str(os.getenv('BIND', '0.0.0.0')) # 框架运行监听地址(0.0.0.0表示监听所有IP地址)
12+
debug = bool(os.getenv('QIANDAO_DEBUG',False)) # 是否开启Debug
13+
gzip = bool(os.getenv('GZIP',True)) # 是否启用gzip
14+
bind = str(os.getenv('BIND', '0.0.0.0')) # 框架运行监听地址(0.0.0.0表示监听所有IP地址)
1515
port = int(os.getenv('PORT', 8923)) # 监听端口Port
16-
https = bool(os.getenv('ENABLE_HTTPS', False)) # 发送的邮件链接启用HTTPS, 非程序使用HTTPS, 需要HTTPS需要使用反向代理
16+
https = bool(os.getenv('ENABLE_HTTPS', False)) # 发送的邮件链接启用HTTPS, 非框架自身HTTPS开关, 需要HTTPS请使用外部反向代理
1717
cookie_days = 5 # Cookie在客户端保留时间
1818
mysql_url = urlparse(os.getenv('JAWSDB_MARIA_URL', '')) # 格式: mysql://用户名:密码@hostname:port/数据库名
1919
redis_url = urlparse(os.getenv('REDISCLOUD_URL', '')) # 格式: (redis/http)://rediscloud:密码@hostname:port
2020

21+
# PyCurl 相关设置
22+
use_pycurl = bool(os.getenv('USE_PYCURL',True)) # 是否启用Pycurl模组, 当环境无PyCurl模组时无效
23+
allow_retry = bool(os.getenv('ALLOW_RETRY', True)) # 在Pycurl环境下部分请求可能导致Request错误时, 自动修改冲突设置并重发请求
24+
curl_encoding = bool(os.getenv('CURL_ENCODING', True)) # 是否允许使用Curl进行Encoding操作, 当PyCurl返回"Error 61 Unrecognized transfer encoding."错误且'ALLOW_RETRY=True'时, 本次请求禁用Headers中的Content-Encoding并重试
25+
curl_length = bool(os.getenv('CURL_CONTENT_LENGTH', True)) # 是否允许Curl使用Headers中自定义Content-Length请求, 当PyCurl返回"HTTP 400 Bad Request"错误且'ALLOW_RETRY=True'时, 本次请求禁用Headers中的Content-Length并重试
26+
not_retry_code = list(map(int,os.getenv('NOT_RETRY_CODE', '301|302|303|304|305|307|400|401|403|404|405|407|408|409|410|412|415|413|414|500|501|502|503|504|599').split('|')))
27+
# 启用后, 当满足PyCurl启用, HTTPError code不在该列表中, 任务代理为空, 且'ALLOW_RETRY=True'时, 本次请求禁用Pycurl并重试
28+
empty_retry = bool(os.getenv('EMPTY_RETRY', True)) # 启用后, 当满足PyCurl启用, 返回Response为空, 任务代理为空, 且'ALLOW_RETRY=True'时, 本次请求禁用Pycurl并重试
29+
2130
class mysql(object):
2231
host = mysql_url.hostname or 'localhost' # 访问MySQL的Hostname
2332
port = mysql_url.port or '3306' # MySQL的端口Port

libs/fetcher.py

+28-21
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,23 @@
1818
from tornado.httputil import HTTPHeaders
1919
from tornado.escape import native_str
2020

21-
try:
22-
import pycurl
23-
except ImportError as e:
24-
print(e)
25-
pycurl = None
2621
from jinja2.sandbox import SandboxedEnvironment as Environment
27-
from tornado import gen, httpclient
22+
from tornado import gen, httpclient, simple_httpclient
2823

2924
import config
3025
from libs import cookie_utils, utils
3126

27+
if config.use_pycurl:
28+
try:
29+
import pycurl
30+
except ImportError as e:
31+
print(e)
32+
pycurl = None
33+
else:
34+
pycurl = None
35+
NOT_RETYR_CODE = config.not_retry_code
3236
logger = logging.getLogger('qiandao.fetcher')
3337

34-
3538
class Fetcher(object):
3639
def __init__(self, download_size_limit=config.download_size_limit):
3740
if pycurl:
@@ -416,25 +419,29 @@ def build_request(en):
416419
version = '1.2'
417420
)
418421
)
419-
async def build_response(self, obj, proxy={}, CURL_ENCODING=True, CURL_CONTENT_LENGTH=True):
422+
async def build_response(self, obj, proxy={}, CURL_ENCODING=config.curl_encoding, CURL_CONTENT_LENGTH=config.curl_length, EMPTY_RETRY = config.empty_retry):
420423
try:
421424
req, rule, env = self.build_request(obj, download_size_limit=self.download_size_limit,proxy=proxy,CURL_ENCODING=CURL_ENCODING,CURL_CONTENT_LENGTH=CURL_CONTENT_LENGTH)
422425
response = await gen.convert_yielded(self.client.fetch(req))
423426
except httpclient.HTTPError as e:
424427
try:
425-
if e.__dict__.get('errno','') == 61:
426-
req, rule, env = self.build_request(obj, download_size_limit=self.download_size_limit,proxy=proxy,CURL_ENCODING=False)
427-
e.response = await gen.convert_yielded(self.client.fetch(req))
428-
elif e.code == 400 and e.message == 'Bad Request' and not e.response:
429-
if req and req.headers.get('content-length'):
430-
req, rule, env = self.build_request(obj, download_size_limit=self.download_size_limit,proxy=proxy,CURL_CONTENT_LENGTH=False)
428+
if config.allow_retry and pycurl:
429+
if e.__dict__.get('errno','') == 61:
430+
logger.warning('{} {} [Warning] {} -> Try to retry!'.format(req.method,req.url,e))
431+
req, rule, env = self.build_request(obj, download_size_limit=self.download_size_limit,proxy=proxy,CURL_ENCODING=False,CURL_CONTENT_LENGTH=CURL_CONTENT_LENGTH)
431432
e.response = await gen.convert_yielded(self.client.fetch(req))
432-
else:
433-
httpclient.AsyncHTTPClient.configure(None)
434-
req, rule, env = self.build_request(obj, download_size_limit=self.download_size_limit,proxy=proxy)
433+
elif e.code == 400 and e.message == 'Bad Request' and req and req.headers.get('content-length'):
434+
logger.warning('{} {} [Warning] {} -> Try to retry!'.format(req.method,req.url,e))
435+
req, rule, env = self.build_request(obj, download_size_limit=self.download_size_limit,proxy=proxy,CURL_ENCODING=CURL_ENCODING,CURL_CONTENT_LENGTH=False)
435436
e.response = await gen.convert_yielded(self.client.fetch(req))
436-
if pycurl:
437-
httpclient.AsyncHTTPClient.configure('tornado.curl_httpclient.CurlAsyncHTTPClient')
437+
elif e.code not in NOT_RETYR_CODE or (EMPTY_RETRY and not e.response):
438+
logger.warning('{} {} [Warning] {} -> Try to retry!'.format(req.method,req.url,e))
439+
client = simple_httpclient.SimpleAsyncHTTPClient()
440+
e.response = await gen.convert_yielded(client.fetch(req))
441+
else:
442+
logger.warning('{} {} [Warning] {}'.format(req.method,req.url,e))
443+
else:
444+
logger.warning('{} {} [Warning] {}'.format(req.method,req.url,e))
438445
finally:
439446
if not e.response:
440447
traceback.print_exc()
@@ -443,7 +450,7 @@ async def build_response(self, obj, proxy={}, CURL_ENCODING=True, CURL_CONTENT_L
443450
return rule, env, e.response
444451
return rule, env, response
445452

446-
async def fetch(self, obj, proxy={}, CURL_ENCODING=True, CURL_CONTENT_LENGTH=True):
453+
async def fetch(self, obj, proxy={}, CURL_ENCODING=config.curl_encoding, CURL_CONTENT_LENGTH=config.curl_length, EMPTY_RETRY = config.empty_retry):
447454
"""
448455
obj = {
449456
request: {
@@ -468,7 +475,7 @@ async def fetch(self, obj, proxy={}, CURL_ENCODING=True, CURL_CONTENT_LENGTH=Tru
468475
}
469476
"""
470477

471-
rule, env, response = await gen.convert_yielded(self.build_response(obj, proxy, CURL_ENCODING, CURL_CONTENT_LENGTH))
478+
rule, env, response = await gen.convert_yielded(self.build_response(obj, proxy, CURL_ENCODING, CURL_CONTENT_LENGTH, EMPTY_RETRY))
472479

473480
env['session'].extract_cookies_to_jar(response.request, response)
474481
success, msg = self.run_rule(response, rule, env)

0 commit comments

Comments
 (0)