Skip to content

Commit ad3f20f

Browse files
Added manga_reader scraper.
1 parent 393ecd7 commit ad3f20f

File tree

4 files changed

+161
-0
lines changed

4 files changed

+161
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
/manga
2+
.DS_Store
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[[source]]
2+
url = "https://pypi.org/simple"
3+
verify_ssl = true
4+
name = "pypi"
5+
6+
[packages]
7+
"bs4" = "*"
8+
selenium = "*"
9+
requests = "*"
10+
11+
[dev-packages]
12+
13+
[requires]
14+
python_version = "3.6"

Automation/src/Manga_Downloader/Pipfile.lock

Lines changed: 81 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#needs polishing
2+
#Searches and downloads the top Manga result page by page from https://www.mangapanda.com
3+
#author- Senthil Kumar @Rats12
4+
5+
6+
import requests
7+
from bs4 import BeautifulSoup
8+
import os
9+
10+
query = input('What manga would you like to search for?: ')
11+
website='https://www.mangareader.net'
12+
13+
def download_image(url, manga_name):
14+
response = requests.get(url)
15+
file_name = os.path.split(url)[1]
16+
17+
if (not os.path.exists('manga')):
18+
os.makedirs('manga')
19+
20+
if (not os.path.exists('manga/%s' % manga_name)):
21+
os.makedirs('manga/%s' % manga_name)
22+
23+
with open("manga/%s/%s" % (manga_name, file_name), 'wb') as f:
24+
for chunk in response.iter_content(4096):
25+
f.write(chunk)
26+
27+
28+
# Get the search page with query and find all results
29+
search_results = BeautifulSoup(
30+
requests.get('%s/search/?w=%s' % (website, query)).text,
31+
'html.parser'
32+
).find_all("div", class_="manga_name")
33+
34+
# Use list comprehension to create an array of the manga names/hrefs
35+
manga_list = [manga.contents[1].contents[1].contents[0] for manga in search_results]
36+
37+
# If array is less than one, no results were found
38+
if (len(manga_list) > 1):
39+
[print('%i) %s' % (i, manga.text)) for i, manga in enumerate(manga_list)]
40+
choice = int(input('Choose which manga you would like to download: '))
41+
chosen_manga = manga_list[choice]
42+
elif (len(manga_list)):
43+
chosen_manga = manga_list[0]
44+
else:
45+
print('Sorry but no search results were found.')
46+
exit()
47+
48+
# Create a recursive function to scrape through manga pages
49+
def scrape_pages(url, manga_name):
50+
# Get page and target the imgholder element's first a tag
51+
image = BeautifulSoup(
52+
requests.get(url).text,
53+
'html.parser'
54+
).find(id='imgholder').a
55+
56+
# If the tag was found, and there is an image with the id image, there is a link and image to scrape
57+
if (image and image.find(id='img')):
58+
download_image(image.find(id='img').get('src'), manga_name)
59+
scrape_pages('%s%s' % (website, image.get('href')), manga_name)
60+
else:
61+
print('No more pages found!')
62+
63+
# Call recursive function
64+
scrape_pages('%s%s/1' % (website, chosen_manga.get('href')), chosen_manga.text)

0 commit comments

Comments
 (0)