Skip to content

Commit 4ca1b33

Browse files
author
2knal
committed
Basic setup
`use_cache=False` option completed. Dependencies and directory restructured.
0 parents  commit 4ca1b33

21 files changed

+948
-0
lines changed

.gitignore

+138
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py,cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
.pybuilder/
76+
target/
77+
78+
# Jupyter Notebook
79+
.ipynb_checkpoints
80+
81+
# IPython
82+
profile_default/
83+
ipython_config.py
84+
85+
# pyenv
86+
# For a library or package, you might want to ignore these files since the code is
87+
# intended to run in multiple environments; otherwise, check them in:
88+
# .python-version
89+
90+
# pipenv
91+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
93+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
94+
# install all needed dependencies.
95+
#Pipfile.lock
96+
97+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
98+
__pypackages__/
99+
100+
# Celery stuff
101+
celerybeat-schedule
102+
celerybeat.pid
103+
104+
# SageMath parsed files
105+
*.sage.py
106+
107+
# Environments
108+
.env
109+
.venv
110+
env/
111+
venv/
112+
ENV/
113+
env.bak/
114+
venv.bak/
115+
116+
# Spyder project settings
117+
.spyderproject
118+
.spyproject
119+
120+
# Rope project settings
121+
.ropeproject
122+
123+
# mkdocs documentation
124+
/site
125+
126+
# mypy
127+
.mypy_cache/
128+
.dmypy.json
129+
dmypy.json
130+
131+
# Pyre type checker
132+
.pyre/
133+
134+
# pytype static type analyzer
135+
.pytype/
136+
137+
# Cython debug symbols
138+
cython_debug/

LICENSE

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2020 Kunal D. Sonawane
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# random_proxies
2+
Python package to generate random proxy on the fly!

proxies/__init__.py

Whitespace-only changes.

proxies/cache_server/README.md

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Cache Server
2+
3+
## Index structures
4+
5+
> Note: type for all the indices will be `proxy`
6+
7+
- <b>proxies</b>: Dump of all the proxies from `main/routine.py`
8+
- <b>recent</b>: Any proxy fetched via `use_cache=True` parameter will be added here
9+
10+
### Proxy structure
11+
12+
##### HTTP / HTTPS Proxy
13+
```json
14+
{
15+
"ip address": "185.140.234.18",
16+
"port": "8080",
17+
"code": "ir",
18+
"country": "iran",
19+
"anonymity": "transparent",
20+
"google": "no",
21+
"https": "no",
22+
"last checked": "5 minutes ago"
23+
}
24+
```
25+
26+
##### SOCKS Proxy
27+
```json
28+
{
29+
"ip address": "185.140.234.18",
30+
"port": "8080",
31+
"code": "ir",
32+
"country": "iran",
33+
"anonymity": "transparent",
34+
"version": "socks4",
35+
"https": "no",
36+
"last checked": "5 minutes ago"
37+
}
38+
```
39+
### Procedures to run
40+
41+
> Note: Adding cronjobs for below routines.
42+
43+
- `main/routine.py`: Run every hour, every day
44+
- `main/update.py`: Run every 15 minutes.

proxies/cache_server/__init__.py

Whitespace-only changes.

proxies/cache_server/config.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# -*- coding: utf-8 -*-
2+
from __future__ import absolute_import, unicode_literals
3+
4+
import os
5+
from os.path import join, dirname
6+
from dotenv import load_dotenv
7+
from elasticsearch import Elasticsearch
8+
9+
from ..random_proxies.settings import BASE_URL, SSL_URL, SOCKS_URL
10+
from ..random_proxies.log import logger
11+
from ..random_proxies.utils import fetch, parse_response
12+
from ..random_proxies.proxy_health import is_good_proxy
13+
14+
env_path = join(dirname(__file__), '.env')
15+
print('ENV PATH:', env_path)
16+
load_dotenv(env_path)
17+
18+
# GLOBALS
19+
elastic_password = os.environ.get('ELASTIC_PASSWORD')
20+
elastic_username = 'elastic'
21+
22+
elastic_uri = 'http://localhost:9200'
23+
24+
# Setting up conn
25+
es = Elasticsearch([elastic_uri], http_auth=(elastic_username, elastic_password))
26+
27+
# Creating necessary index
28+
if not es.indices.exists(index='proxies'):
29+
es.indices.create(index='proxies', ignore=400)
30+
31+
if not es.indices.exists(index='recents'):
32+
es.indices.create(index='recents', ignore=400)
+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
version: '3'
2+
3+
services:
4+
elasticsearch:
5+
image: docker.elastic.co/elasticsearch/elasticsearch:7.6.2
6+
container_name: cache_elasticsearch
7+
environment:
8+
- cluster.name=cache-server
9+
- discovery.type=single-node
10+
- xpack.security.enabled=true
11+
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
12+
- ES_JAVA_OPTS=-Xms512m -Xmx512m
13+
volumes:
14+
- data:/usr/share/elasticsearch/data
15+
ports:
16+
- 9200:9200
17+
networks:
18+
- elastic
19+
restart: always
20+
21+
volumes:
22+
data:
23+
driver: local
24+
25+
networks:
26+
elastic:
27+
driver: bridge

proxies/cache_server/routine.py

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# -*- coding: utf-8 -*-
2+
from __future__ import absolute_import, unicode_literals
3+
4+
from elasticsearch import helpers
5+
6+
from time import time
7+
8+
from proxies.cache_server.config import es
9+
from proxies.cache_server.config import fetch, parse_response
10+
from proxies.cache_server.config import is_good_proxy
11+
from proxies.cache_server.config import logger
12+
from proxies.cache_server.config import BASE_URL, SSL_URL, SOCKS_URL
13+
14+
def _add(proxies):
15+
actions = [
16+
{
17+
"_index": "proxies",
18+
"_type" : "proxy",
19+
"_id" : proxy['ip address'] + ':' + proxy['port'],
20+
"doc": proxy
21+
}
22+
for proxy in proxies
23+
]
24+
if proxies:
25+
try:
26+
tic = time()
27+
helpers.bulk(es, actions)
28+
tac = time()
29+
print('Time taken to add to index:', tac - tic)
30+
except Exception as e:
31+
template = 'An exception of type {0} occurred.\nArguments: {1!r}'
32+
message = template.format(type(e).__name__, e.args)
33+
logger.error(message)
34+
35+
def _check():
36+
urls = [BASE_URL, SSL_URL, SOCKS_URL]
37+
proxies = []
38+
# Fetch all the proxies from these urls
39+
for url in urls:
40+
res = fetch(url)
41+
# Passing empty conditions so that
42+
proxies.extend(parse_response(res, {}))
43+
44+
# Check if they work
45+
working_proxies = []
46+
for proxy in proxies:
47+
ip = proxy['ip address'] + ':' + proxy['port']
48+
# Implies SOCKS proxy
49+
if 'version' in proxy:
50+
ip = version + '://' + ip
51+
protocol = ('http', 'https')[proxy['https'] == 'yes']
52+
53+
try:
54+
# Only if it works
55+
if is_good_proxy(ip, protocol=protocol):
56+
working_proxies.append(proxy)
57+
except Exception as e:
58+
template = 'An exception of type {0} occurred.\nArguments: {1!r}'
59+
message = template.format(type(e).__name__, e.args)
60+
logger.error(message)
61+
62+
return working_proxies
63+
64+
if __name__ == '__main__':
65+
tic = time()
66+
proxies = _check()
67+
# _add(proxies)
68+
tac = time()
69+
print('Total time:', tac - tic)

proxies/cache_server/update.py

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# -*- coding: utf-8 -*-
2+
from __future__ import absolute_import, unicode_literals

proxies/random_proxies/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# -*- coding: utf-8 -*-
2+
from __future__ import absolute_import, unicode_literals
3+
4+
from proxies.random_proxies.proxy import random_proxy

0 commit comments

Comments
 (0)