-
Notifications
You must be signed in to change notification settings - Fork 0
/
flight_scraper.py
89 lines (80 loc) · 6.2 KB
/
flight_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
#------------------------------------------For headless driver------------------------------------------#
from selenium.webdriver.chrome.options import Options
DRIVER_PATH = "/Volumes/WD Drive/Applications/Chromedriver/chromedriver"
options = Options()
options.headless = True
options.add_argument("--window-size=1920,1200")
driver = webdriver.Chrome(options = options, executable_path = DRIVER_PATH)
driver.get("https://google.com/flights")
#-------------------------------------------------------------------------------------------------------#
#------------------------------------------For driver with GUI------------------------------------------#
#DRIVER_PATH = "/Volumes/WD Drive/Applications/Chromedriver/chromedriver"
#driver = webdriver.Chrome(executable_path = DRIVER_PATH)
#driver.get("https://www.google.com/flights")
#-------------------------------------------------------------------------------------------------------#
DEPARTING_AIRPORT = "Salt Lake City" # Enter departing airport here
MAX_PRICE = 150 # Enter max price here
SENDER_EMAIL = "@gmail.com" # Enter sender email here
PASSWORD = "pass" # Insert password here
RECEIVER_EMAIL = "@gmail.com" # Insert reciver email here
try:
#-------------------------------------Enter in departing airport-------------------------------------#
try:
whereFromInput = driver.find_element_by_xpath("//*[@id=\"i6\"]/div[1]/div/div/div[1]/div/div/input")
plainMessage = "Subject: Cheap Flights from " + whereFromInput.get_attribute("value") + "\n\n"
whereFromInput.click()
whereFromInput.send_keys(DEPARTING_AIRPORT)
whereFromInput.send_keys(Keys.ENTER)
time.sleep(1)
airportSelector = driver.find_element_by_xpath("/html/body/c-wiz[2]/div/div[2]/div/c-wiz/div/c-wiz/div[2]/div[1]/div[1]/div[2]/div[1]/div[6]/div[2]/div[6]/div/ul/li[1]/div[2]/div[1]/div")
airportSelector.click()
plainMessage = "Subject: Cheap Flights from " + DEPARTING_AIRPORT + "\n\n"
except:
print("Error: Scraping done on local location instead of specified departing airport.")
#-----------------------------------------Click search button-----------------------------------------#
searchButton = driver.find_element_by_xpath("//*[@id=\"yDmH0d\"]/c-wiz[2]/div/div[2]/div/c-wiz/div/c-wiz/div[2]/div[1]/div[2]/div/button")
searchButton.click()
#-------------------------------Click bags button and select 1 carry-on-------------------------------#
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, "/html/body/c-wiz[2]/div/div[2]/div/div[1]/div/c-wiz/div[2]/div/div[1]/div[1]/div/div/div/div/div[2]/div[1]/div/div[5]")))
bagsButton = driver.find_element_by_xpath("/html/body/c-wiz[2]/div/div[2]/div/div[1]/div/c-wiz/div[2]/div/div[1]/div[1]/div/div/div/div/div[2]/div[1]/div/div[5]")
bagsButton.click()
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, "/html/body/c-wiz[2]/div/div[2]/div/div[1]/div/c-wiz/div[2]/div/div[1]/div[1]/div/div/div/div[2]/div[2]/div[1]/div[1]/section/div[2]/div/div[2]/div[2]/div/div/button[2]")))
carryonButton = driver.find_element_by_xpath("/html/body/c-wiz[2]/div/div[2]/div/div[1]/div/c-wiz/div[2]/div/div[1]/div[1]/div/div/div/div[2]/div[2]/div[1]/div[1]/section/div[2]/div/div[2]/div[2]/div/div/button[2]")
carryonButton.click()
#--------------------------------Change calendar dates to flexible trip--------------------------------#
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, "/html/body/c-wiz[2]/div/div[2]/div/div/div/c-wiz/div[2]/div/div[1]/div[1]/section/div/div[1]/div[1]/div[1]/div[2]/div[2]/div/div/div[1]/div/div/div/div[1]/div/div[1]/div/input"))).click()
time.sleep(1)
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, "/html/body/c-wiz[2]/div/div[2]/div/div/div/c-wiz/div[2]/div/div[1]/div[1]/section/div/div[1]/div[1]/div[1]/div[2]/div[2]/div/div/div[2]/div/div[2]/div/div[1]/div/div/div/span/button[2]/span[2]"))).click()
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, "/html/body/c-wiz[2]/div/div[2]/div/div[1]/div/c-wiz/div[2]/div/div[1]/div[1]/section/div/div[1]/div[1]/div[1]/div[2]/div[2]/div/div/div[2]/div/div[3]/div[1]/button"))).click()
time.sleep(3)
#-------------------------------------Scrape all listings generated-------------------------------------#
ticketList = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, "/html/body/c-wiz[2]/div/div[2]/div/div[1]/div/c-wiz/div[2]/div/div[1]/main/div/div[2]/ol")))
for listing in ticketList.find_elements_by_xpath(".//li"):
try:
destination = listing.find_element_by_xpath(".//div/div[2]/div[1]/h3").text
price = listing.find_element_by_xpath(".//div/div[2]/div[2]/div/span/span").text[1:]
duration = listing.find_element_by_xpath(".//div/div[2]/div[1]/div[1]").text
if int(price) <= MAX_PRICE:
plainMessage += destination + " - $" + price + "\n" + duration + "\n\n"
except:
break
#--------------------------------Add the current URL to the emailMessage---------------------------------#
plainMessage += "Find all listed flights here:\n" + driver.current_url
finally:
#------------------------------------------Create and send email------------------------------------------#
import smtplib, ssl
port = 465
context = ssl.create_default_context()
with smtplib.SMTP_SSL("smtp.gmail.com", port, context=context) as server:
server.login(SENDER_EMAIL, PASSWORD)
server.sendmail(SENDER_EMAIL, RECEIVER_EMAIL, plainMessage.encode("utf-8"))
#---------------------------------------------------------------------------------------------------------#
driver.quit()
print("Scraping completed. Email sent.")