Skip to content

Commit 71e1d54

Browse files
committed
feat: 实现获取验证码完成
1 parent 00b866d commit 71e1d54

File tree

11 files changed

+137
-0
lines changed

11 files changed

+137
-0
lines changed
Binary file not shown.
Binary file not shown.
File renamed without changes.
Binary file not shown.
Binary file not shown.

demo_project/chaojiying_Python/a.jpg

1.81 KB
Loading
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/usr/bin/env python
2+
# coding:utf-8
3+
4+
import requests
5+
from hashlib import md5
6+
7+
class Chaojiying_Client(object):
8+
9+
def __init__(self, username, password, soft_id):
10+
self.username = username
11+
password = password.encode('utf8')
12+
self.password = md5(password).hexdigest()
13+
self.soft_id = soft_id
14+
self.base_params = {
15+
'user': self.username,
16+
'pass2': self.password,
17+
'softid': self.soft_id,
18+
}
19+
self.headers = {
20+
'Connection': 'Keep-Alive',
21+
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
22+
}
23+
24+
def PostPic(self, im, codetype):
25+
"""
26+
im: 图片字节
27+
codetype: 题目类型 参考 http://www.chaojiying.com/price.html
28+
"""
29+
params = {
30+
'codetype': codetype,
31+
}
32+
params.update(self.base_params)
33+
files = {'userfile': ('ccc.jpg', im)}
34+
r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
35+
return r.json()
36+
37+
def PostPic_base64(self, base64_str, codetype):
38+
"""
39+
im: 图片字节
40+
codetype: 题目类型 参考 http://www.chaojiying.com/price.html
41+
"""
42+
params = {
43+
'codetype': codetype,
44+
'file_base64':base64_str
45+
}
46+
params.update(self.base_params)
47+
r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, headers=self.headers)
48+
return r.json()
49+
50+
def ReportError(self, im_id):
51+
"""
52+
im_id:报错题目的图片ID
53+
"""
54+
params = {
55+
'id': im_id,
56+
}
57+
params.update(self.base_params)
58+
r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
59+
return r.json()
60+
61+
62+
if __name__ == '__main__':
63+
chaojiying = Chaojiying_Client('', '', '') #用户中心>>软件ID 生成一个替换 96001
64+
im = open('a.jpg', 'rb').read() #本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
65+
print(chaojiying.PostPic(im, 1004)['pic_str']) #1902 验证码类型 官方网站>>价格体系 3.4+版 print 后要加()
66+
#print chaojiying.PostPic(base64_str, 1902) #此处为传入 base64代码

demo_project/getSoGuShiWenData.py

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# 开始实现我们的爬取一个 GuShiWen 网站的数据
2+
import os
3+
import requests
4+
from lxml import etree
5+
from chaojiying_Python.chaojiying import Chaojiying_Client
6+
7+
8+
class GetGuShiWenData(object):
9+
def __init__(self):
10+
self.index_url = "https://www.gushiwen.cn/user/login.aspx?from=http://www.gushiwen.cn/user/collect.aspx"
11+
self.headers = {
12+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537."
13+
"36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
14+
}
15+
16+
def __del__(self):
17+
"""
18+
析构函数
19+
:return:
20+
"""
21+
print("此次爬虫任务完毕...")
22+
23+
def get_image_url(self):
24+
"""
25+
用来实现获取我们的验证码请求链接网的处理函数
26+
:return: f"https://so.gushiwen.cn{response_xml}"
27+
"""
28+
response_html = requests.get(self.index_url, headers=self.headers).text
29+
response_xml = etree.HTML(response_html).xpath('//img[@id="imgCode"]/@src')[0]
30+
return f"https://so.gushiwen.cn{response_xml}"
31+
32+
def save_image_data(self, url):
33+
"""
34+
实现的是保存我们爬取的图片连接
35+
:param url:
36+
:return: img_path
37+
"""
38+
img_data = requests.get(url, headers=self.headers).content
39+
img_path = "../get_image/parse_guzhiwen.jpg"
40+
if os.path.exists(img_path):
41+
with open(img_path, "r+b") as f:
42+
f.write(img_data)
43+
else:
44+
with open(img_path, "w+b") as f:
45+
f.write(img_data)
46+
return img_path
47+
48+
def get_image_data(self, path):
49+
"""
50+
实现获取验证码操作
51+
:param path:
52+
:return: ChaoJiYing.PostPic(img, 1004)['pic_str']
53+
"""
54+
ChaoJiYing = Chaojiying_Client("juwenzhang", "451674jh", '958562')
55+
img = open(path, "rb").read()
56+
return ChaoJiYing.PostPic(img, 1004)['pic_str']
57+
58+
def parse_data(self):
59+
pass
60+
61+
def save_dara(self):
62+
pass
63+
64+
def run(self):
65+
img_url = self.get_image_url()
66+
img_path = self.save_image_data(img_url)
67+
self.get_image_data(img_path)
68+
69+
if __name__ == "__main__":
70+
getGuShiWenData = GetGuShiWenData()
71+
getGuShiWenData.run()

get_image/__init__.py

Whitespace-only changes.

get_image/parse_guzhiwen.jpg

1.42 KB
Loading

images/image01.png

-54.6 KB
Binary file not shown.

0 commit comments

Comments
 (0)