-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathqueryWOKO.py
150 lines (122 loc) · 4.13 KB
/
queryWOKO.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
from urllib.request import urlopen
from urllib.parse import urljoin
import ssl
import smtplib
import time
import random
from bs4 import BeautifulSoup
import yaml
with open("config.yaml", "r") as opened_file:
config = yaml.safe_load(opened_file)
def send_message(config, body=""):
"""
Send email
:param body: the body of the email.
:param receiver_email:
:param sender_email:
:param password: The app-password of the email.
:return:
"""
receiver_email = config.get('receiver_email')
sender_email = config.get('sender_email')
password = config.get('password')
port = 587 # For starttls
smtp_server = "smtp.gmail.com"
message = f"Subject: You have a new post\n\n\n{body}\n---\n\n\nCheers,\nYour team"
context = ssl.create_default_context()
with smtplib.SMTP(smtp_server, port) as server:
server.ehlo() # Can be omitted
server.starttls(context=context)
server.ehlo() # Can be omitted
server.login(sender_email, password)
server.sendmail(sender_email, receiver_email, message.encode('utf-8'))
print('Message sent!')
def query_room_website(url):
print(f'Scraping {url}')
html = urlopen(url).read()
soup = BeautifulSoup(html, features="html.parser")
body = ""
# "tr" is table row
for row in soup.find_all('tr'):
cells = row.find_all('td')
if len(cells) == 2:
for cell in cells:
info = cell.text.strip()
body += info + '\n'
body += '\n'
body += f'Listing link\n{url}'
return body
def query_main_website() -> list:
"""
Find all listings on the main WOKO website
:return: list of listings
"""
url = config["url_woko"]
html = urlopen(url).read()
soup = BeautifulSoup(html, features="html.parser")
listings = {}
id = ''
zurich_variations = ('zurich', 'zürich', 'zuerich')
winterthur_variations = ('winterthur', 'wädenswil', 'waedenswil')
for button in soup.find_all('button'):
button_text = button.text.lower()
# The button and div containing the listing are linked with a data-gruppeid number.
if 'data-gruppeid' in str(button):
# Looking for the button corresponding to a specific city.
if (
config['city'].lower() in zurich_variations
and any(city in button_text for city in zurich_variations)
):
id = button['data-gruppeid']
break
elif (
config['city'].lower() in winterthur_variations
and any(city in button_text for city in winterthur_variations)
):
id = button['data-gruppeid']
break
elif 'free rooms' in button_text:
id = button['data-gruppeid']
break
if id == '':
print("Couldn't find the room buttons")
return listings
# This is the div which the button reveals
div = soup.find('div', attrs={'id': f'GruppeID_{id}'})
listing_urls = []
for link in div.find_all('a'):
relative_room_url = link['href']
room_url = urljoin(url, relative_room_url)
listing_urls.append(room_url)
return listing_urls
def sleep():
"""
Sleep time
:return:
"""
timer = config["timer"] * random.choice([1, 2])
print(f"Sleep for: {timer // 60}min.")
time.sleep(timer)
listing_urls = query_main_website()
if len(listing_urls) == 0:
print('No listings found')
if config['test_email']:
print('Cannot test without any listings, exiting')
exit()
if config['test_email']:
listing_urls.pop()
while True:
next_listing_urls = query_main_website()
new_listing_urls = set(next_listing_urls) - set(listing_urls)
if new_listing_urls:
for new_listing_url in new_listing_urls:
send_message(
body=query_room_website(new_listing_url),
config=config,
)
print("Found!")
listing_urls = next_listing_urls
sleep()
else:
print(f"Still: {len(next_listing_urls)} rooms...")
sleep()