6
6
check : run check module
7
7
"""
8
8
9
- __version__ = '1.0.0'
9
+ __version__ = '1.1.0'
10
+
11
+ import base64
12
+ import logging
13
+ import random
14
+ import re
15
+ import sys
16
+ import urllib2
10
17
11
- import logging , re , sys , random , urllib2 , base64
12
- import requests
13
18
import gevent
19
+ import requests
14
20
from gevent import monkey
15
21
16
22
from setting import Config
@@ -51,7 +57,7 @@ def _fetchUrl(self, pageurl, *args, **kwargs):
51
57
else :
52
58
data = None
53
59
54
- if 'dataType ' in kwargs :
60
+ if 'datatype ' in kwargs :
55
61
dataType = kwargs ['datatype' ]
56
62
else :
57
63
dataType = 'html'
@@ -100,13 +106,13 @@ def freeproxylistsHttp(self):
100
106
proxies = []
101
107
u = 'http://www.freeproxylists.com/%s' % mm [0 ]
102
108
html = self ._fetchUrl (u )
103
- us , matches = [], re .findall ('''<a href='%s/d([^']+)\.html'>''' % mm [1 ],html )
109
+ us , matches = [], re .findall ('''<a href='%s/d([^']+)\.html'>''' % mm [1 ], html )
104
110
for g in matches :
105
111
us .append (g )
106
112
for uid in us :
107
- u = 'http://www.freeproxylists.com/load_%s_%s.html' % (mm [1 ],uid )
113
+ u = 'http://www.freeproxylists.com/load_%s_%s.html' % (mm [1 ], uid )
108
114
html = self ._fetchUrl (u )
109
- searchs = re .findall (pattern ,html )
115
+ searchs = re .findall (pattern , html )
110
116
for g in searchs :
111
117
proxies .append (g [0 ] + ':' + g [2 ])
112
118
self .logger .debug (g [0 ] + ':' + g [2 ])
@@ -115,10 +121,11 @@ def freeproxylistsHttp(self):
115
121
116
122
def freeproxylist (self ):
117
123
proxies = []
118
- pattern = re .compile ('<tr><td>(\d+\.\d+\.\d+\.\d+)</td><td>(\d+)</td><td>[^<]+</td><td>[^<]+</td><td>((elite proxy)|(anonymous))</td><td>[^<]+</td><td>[^<]+</td><td>[^<]+</td></tr>' )
124
+ pattern = re .compile (
125
+ '<tr><td>(\d+\.\d+\.\d+\.\d+)</td><td>(\d+)</td><td>[^<]+</td><td>[^<]+</td><td>((elite proxy)|(anonymous))</td><td>[^<]+</td><td>[^<]+</td><td>[^<]+</td></tr>' )
119
126
u = 'http://free-proxy-list.net/'
120
127
html = self ._fetchUrl (u )
121
- #print html
128
+ # print html
122
129
searchs = re .findall (pattern , html )
123
130
for g in searchs :
124
131
proxies .append (g [0 ] + ":" + g [1 ])
@@ -279,7 +286,7 @@ def proxylistsHttp(self):
279
286
proxies .append (ips [0 ] + ":" + g [1 ])
280
287
self .logger .debug (ips [0 ] + ":" + g [1 ])
281
288
uid = uid + 1
282
- if "<a href='{}_{}_ext.html'>{}</a>" .format (dic , uid , uid + 1 ) not in html :
289
+ if "<a href='{}_{}_ext.html'>{}</a>" .format (dic , uid , uid + 1 ) not in html :
283
290
break
284
291
self ._save_proxies (proxies )
285
292
@@ -364,11 +371,11 @@ def proxylist_hidemyass_com(self):
364
371
self .logger .debug (g [0 ][0 ] + ':' + g [0 ][1 ])
365
372
self ._save_proxies (proxies )
366
373
367
- def _validate_proxy (self , ip , port ):
374
+ def _validate_proxy (self , ip , port ):
368
375
url = 'http://p.gkeeps.com/chk.php'
369
376
proxies = {'http' : 'http://{}:{}' .format (ip , port )}
370
- html = self ._fetchUrl (url , proxies = proxies , timeout = 20 )
371
- if html . strip (). startswith ( ip ) :
377
+ j = self ._fetchUrl (url , proxies = proxies , timeout = 20 , datatype = 'json' )
378
+ if isinstance ( j , dict ) and j [ 'level' ] != 'transparent' :
372
379
sql = 'update http set `lastcheck`=CURRENT_TIMESTAMP, `failtimes`=0 ' \
373
380
'where `ip`=%(ip)s and `port`=%(port)s'
374
381
else :
@@ -427,4 +434,4 @@ def check(self):
427
434
monkey .patch_all ()
428
435
thread = gevent .spawn (func )
429
436
thread .start ()
430
- thread .join ()
437
+ thread .join ()
0 commit comments