Skip to content

Commit 05af9d0

Browse files
committed
improve the validate method
1 parent 0d09bb8 commit 05af9d0

File tree

3 files changed

+45
-20
lines changed

3 files changed

+45
-20
lines changed

controller.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
#!/usr/bin/env python
22
# coding:utf-8
33

4-
import urllib, random
5-
from bottle import get, post, route, redirect, static_file
6-
from bottle import request, response, jinja2_template
4+
from bottle import get, route, static_file
5+
from bottle import request, jinja2_template
76

87
from setting import Config
98
from storager import MysqlStorager
@@ -41,5 +40,23 @@ def home():
4140
def chk():
4241
# it was replaced by nginx
4342
x_real_ip = request.environ.get('HTTP_X_REAL_IP')
43+
via = request.environ.get('HTTP_VIA')
4444
x_forward_for = request.environ.get('HTTP_X_FORWARDED_FOR')
45-
return '{}\n{}'.format(x_forward_for, x_real_ip)
45+
user_agent = request.environ.get('HTTP_USER_AGENT')
46+
accept_language = request.environ.get('HTTP_ACCEPT_LANGUAGE')
47+
48+
result = {'remote_addr': x_real_ip, 'user_agent': user_agent, 'accept_language': accept_language}
49+
if via:
50+
result['via'] = via
51+
if x_forward_for:
52+
result['x_forward_for'] = x_forward_for
53+
54+
ip = ','.join([x for x in (via, x_forward_for) if x])
55+
level = 'high anonymous'
56+
if ip:
57+
level = 'anonymous'
58+
if ip != x_real_ip:
59+
level = 'transparent'
60+
result['level'] = level
61+
62+
return result

requirements.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
gevent
1+
#gevent
22
requests
33
bottle
4-
beaker
4+
beaker
5+
PyYAML

service.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,17 @@
66
check : run check module
77
"""
88

9-
__version__ = '1.0.0'
9+
__version__ = '1.1.0'
10+
11+
import base64
12+
import logging
13+
import random
14+
import re
15+
import sys
16+
import urllib2
1017

11-
import logging, re, sys, random, urllib2, base64
12-
import requests
1318
import gevent
19+
import requests
1420
from gevent import monkey
1521

1622
from setting import Config
@@ -51,7 +57,7 @@ def _fetchUrl(self, pageurl, *args, **kwargs):
5157
else:
5258
data = None
5359

54-
if 'dataType' in kwargs:
60+
if 'datatype' in kwargs:
5561
dataType = kwargs['datatype']
5662
else:
5763
dataType = 'html'
@@ -100,13 +106,13 @@ def freeproxylistsHttp(self):
100106
proxies = []
101107
u = 'http://www.freeproxylists.com/%s' % mm[0]
102108
html = self._fetchUrl(u)
103-
us, matches = [], re.findall('''<a href='%s/d([^']+)\.html'>''' % mm[1],html)
109+
us, matches = [], re.findall('''<a href='%s/d([^']+)\.html'>''' % mm[1], html)
104110
for g in matches:
105111
us.append(g)
106112
for uid in us:
107-
u = 'http://www.freeproxylists.com/load_%s_%s.html' % (mm[1],uid)
113+
u = 'http://www.freeproxylists.com/load_%s_%s.html' % (mm[1], uid)
108114
html = self._fetchUrl(u)
109-
searchs = re.findall(pattern,html)
115+
searchs = re.findall(pattern, html)
110116
for g in searchs:
111117
proxies.append(g[0] + ':' + g[2])
112118
self.logger.debug(g[0] + ':' + g[2])
@@ -115,10 +121,11 @@ def freeproxylistsHttp(self):
115121

116122
def freeproxylist(self):
117123
proxies = []
118-
pattern = re.compile('<tr><td>(\d+\.\d+\.\d+\.\d+)</td><td>(\d+)</td><td>[^<]+</td><td>[^<]+</td><td>((elite proxy)|(anonymous))</td><td>[^<]+</td><td>[^<]+</td><td>[^<]+</td></tr>')
124+
pattern = re.compile(
125+
'<tr><td>(\d+\.\d+\.\d+\.\d+)</td><td>(\d+)</td><td>[^<]+</td><td>[^<]+</td><td>((elite proxy)|(anonymous))</td><td>[^<]+</td><td>[^<]+</td><td>[^<]+</td></tr>')
119126
u = 'http://free-proxy-list.net/'
120127
html = self._fetchUrl(u)
121-
#print html
128+
# print html
122129
searchs = re.findall(pattern, html)
123130
for g in searchs:
124131
proxies.append(g[0] + ":" + g[1])
@@ -279,7 +286,7 @@ def proxylistsHttp(self):
279286
proxies.append(ips[0] + ":" + g[1])
280287
self.logger.debug(ips[0] + ":" + g[1])
281288
uid = uid + 1
282-
if "<a href='{}_{}_ext.html'>{}</a>".format(dic, uid, uid+1) not in html:
289+
if "<a href='{}_{}_ext.html'>{}</a>".format(dic, uid, uid + 1) not in html:
283290
break
284291
self._save_proxies(proxies)
285292

@@ -364,11 +371,11 @@ def proxylist_hidemyass_com(self):
364371
self.logger.debug(g[0][0] + ':' + g[0][1])
365372
self._save_proxies(proxies)
366373

367-
def _validate_proxy(self, ip , port):
374+
def _validate_proxy(self, ip, port):
368375
url = 'http://p.gkeeps.com/chk.php'
369376
proxies = {'http': 'http://{}:{}'.format(ip, port)}
370-
html = self._fetchUrl(url, proxies=proxies, timeout=20)
371-
if html.strip().startswith(ip):
377+
j = self._fetchUrl(url, proxies=proxies, timeout=20, datatype='json')
378+
if isinstance(j, dict) and j['level'] != 'transparent':
372379
sql = 'update http set `lastcheck`=CURRENT_TIMESTAMP, `failtimes`=0 ' \
373380
'where `ip`=%(ip)s and `port`=%(port)s'
374381
else:
@@ -427,4 +434,4 @@ def check(self):
427434
monkey.patch_all()
428435
thread = gevent.spawn(func)
429436
thread.start()
430-
thread.join()
437+
thread.join()

0 commit comments

Comments
 (0)