Skip to content

Commit b6fac5d

Browse files
Merge pull request #24 from conspicuousClockwork/feature/manga_reader
Added manga_reader scraper. Thanks for you contribution.
2 parents 5f68bad + ae6e3bb commit b6fac5d

File tree

4 files changed

+156
-0
lines changed

4 files changed

+156
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
/manga
2+
.DS_Store
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[[source]]
2+
url = "https://pypi.org/simple"
3+
verify_ssl = true
4+
name = "pypi"
5+
6+
[packages]
7+
"bs4" = "*"
8+
selenium = "*"
9+
requests = "*"
10+
11+
[dev-packages]
12+
13+
[requires]
14+
python_version = "3.6"

Automation/src/Manga_Downloader/Pipfile.lock

Lines changed: 81 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
import os
4+
5+
query = input('What manga would you like to search for?: ')
6+
website = 'https://www.mangareader.net'
7+
8+
def download_image(url, manga_name):
9+
response = requests.get(url)
10+
file_name = os.path.split(url)[1]
11+
12+
if (not os.path.exists('manga')):
13+
os.makedirs('manga')
14+
15+
if (not os.path.exists('manga/%s' % manga_name)):
16+
os.makedirs('manga/%s' % manga_name)
17+
18+
with open("manga/%s/%s" % (manga_name, file_name), 'wb') as f:
19+
for chunk in response.iter_content(4096):
20+
f.write(chunk)
21+
22+
23+
# Get the search page with query and find all results
24+
search_results = BeautifulSoup(
25+
requests.get('%s/search/?w=%s' % (website, query)).text,
26+
'html.parser'
27+
).find_all("div", class_="manga_name")
28+
29+
# Use list comprehension to create an array of the manga names/hrefs
30+
manga_list = [manga.contents[1].contents[1].contents[0] for manga in search_results]
31+
32+
# If array is less than one, no results were found
33+
if (len(manga_list) > 1):
34+
[print('%i) %s' % (i, manga.text)) for i, manga in enumerate(manga_list)]
35+
choice = int(input('Choose which manga you would like to download: '))
36+
chosen_manga = manga_list[choice]
37+
elif (len(manga_list)):
38+
chosen_manga = manga_list[0]
39+
else:
40+
print('Sorry but no search results were found.')
41+
exit()
42+
43+
# Create a recursive function to scrape through manga pages
44+
def scrape_pages(url, manga_name):
45+
# Get page and target the imgholder element's first a tag
46+
image = BeautifulSoup(
47+
requests.get(url).text,
48+
'html.parser'
49+
).find(id='imgholder').a
50+
51+
# If the tag was found, and there is an image with the id image, there is a link and image to scrape
52+
if (image and image.find(id='img')):
53+
download_image(image.find(id='img').get('src'), manga_name)
54+
scrape_pages('%s%s' % (website, image.get('href')), manga_name)
55+
else:
56+
print('No more pages found!')
57+
58+
# Call recursive function
59+
scrape_pages('%s%s/1' % (website, chosen_manga.get('href')), chosen_manga.text)

0 commit comments

Comments
 (0)