forked from Afnan-Navaz/Amazon_Scraper
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata.py
More file actions
70 lines (69 loc) · 1.93 KB
/
data.py
File metadata and controls
70 lines (69 loc) · 1.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from urllib.request import Request, urlopen
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
import requests
import random
import re
from torrequest import TorRequest
def pageSpr(link):
dic={}
ua = UserAgent()
headers = {'User-agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36 ua.random'}
tr=TorRequest(1234)
tr.reset_identity()
url=requests.get('https://www.amazon.in/dp/'+str(link)+'?tag=YOURASSOCIATEID', headers=headers).text
soup=BeautifulSoup(url, 'lxml')
try:
if soup.find('h1', id='title').span['id'] == "productTitle":
xid='productTitle'
else:
xid='ebooksProductTitle'
except (AttributeError, TypeError):
pass
try:
dic["TITLE"]=soup.find('span', id=xid).text
except (AttributeError, TypeError, UnboundLocalError):
dic["TITLE"]=" "
pass
try:
dic["AUTHOR"]=soup.find('a', class_='contributorNameID').text
except (AttributeError, TypeError):
dic["AUTHOR"]=" "
pass
try:
dic["NO OF REVIEWS"]=soup.find('span', id='acrCustomerReviewText').text
except (AttributeError, TypeError):
dic["NO OF REVIEWS"]=" "
pass
try:
dic["RATING"]=soup.find('span', id='acrPopover')['title']
except (AttributeError, TypeError):
dic["RATING"]=" "
pass
try:
price=soup.find('div', id='twister').find_all('div', class_='top-level')
except (AttributeError, TypeError):
pass
try:
for i in price:
try:
dic[i.find('span', class_='a-color-base').text]=i.find('span', class_='a-color-price').text
except (AttributeError, TypeError):
continue
except (UnboundLocalError):
pass
try:
prodet=soup.find('div', id='detail_bullets_id').ul.find_all('li')
except (AttributeError, TypeError, ValueError):
pass
try:
for pro in prodet:
try:
prod=str(pro.text).split(" ")
if len(prod)==2:
dic[prod[0]]=[prod[1]]
except (AttributeError, TypeError, ValueError):
continue
except (UnboundLocalError):
pass
return dic