|
| 1 | +import requests |
| 2 | +from bs4 import BeautifulSoup |
| 3 | +import os |
| 4 | + |
| 5 | +query = input('What manga would you like to search for?: ') |
| 6 | +website = 'https://www.mangareader.net' |
| 7 | + |
| 8 | +def download_image(url, manga_name): |
| 9 | + response = requests.get(url) |
| 10 | + file_name = os.path.split(url)[1] |
| 11 | + |
| 12 | + if (not os.path.exists('manga')): |
| 13 | + os.makedirs('manga') |
| 14 | + |
| 15 | + if (not os.path.exists('manga/%s' % manga_name)): |
| 16 | + os.makedirs('manga/%s' % manga_name) |
| 17 | + |
| 18 | + with open("manga/%s/%s" % (manga_name, file_name), 'wb') as f: |
| 19 | + for chunk in response.iter_content(4096): |
| 20 | + f.write(chunk) |
| 21 | + |
| 22 | + |
| 23 | +# Get the search page with query and find all results |
| 24 | +search_results = BeautifulSoup( |
| 25 | + requests.get('%s/search/?w=%s' % (website, query)).text, |
| 26 | + 'html.parser' |
| 27 | +).find_all("div", class_="manga_name") |
| 28 | + |
| 29 | +# Use list comprehension to create an array of the manga names/hrefs |
| 30 | +manga_list = [manga.contents[1].contents[1].contents[0] for manga in search_results] |
| 31 | + |
| 32 | +# If array is less than one, no results were found |
| 33 | +if (len(manga_list) > 1): |
| 34 | + [print('%i) %s' % (i, manga.text)) for i, manga in enumerate(manga_list)] |
| 35 | + choice = int(input('Choose which manga you would like to download: ')) |
| 36 | + chosen_manga = manga_list[choice] |
| 37 | +elif (len(manga_list)): |
| 38 | + chosen_manga = manga_list[0] |
| 39 | +else: |
| 40 | + print('Sorry but no search results were found.') |
| 41 | + exit() |
| 42 | + |
| 43 | +# Create a recursive function to scrape through manga pages |
| 44 | +def scrape_pages(url, manga_name): |
| 45 | + # Get page and target the imgholder element's first a tag |
| 46 | + image = BeautifulSoup( |
| 47 | + requests.get(url).text, |
| 48 | + 'html.parser' |
| 49 | + ).find(id='imgholder').a |
| 50 | + |
| 51 | + # If the tag was found, and there is an image with the id image, there is a link and image to scrape |
| 52 | + if (image and image.find(id='img')): |
| 53 | + download_image(image.find(id='img').get('src'), manga_name) |
| 54 | + scrape_pages('%s%s' % (website, image.get('href')), manga_name) |
| 55 | + else: |
| 56 | + print('No more pages found!') |
| 57 | + |
| 58 | +# Call recursive function |
| 59 | +scrape_pages('%s%s/1' % (website, chosen_manga.get('href')), chosen_manga.text) |
0 commit comments