|
1 |
| -import getpass, urllib, os, re |
| 1 | +import getpass, requests, os, re |
2 | 2 | from pathlib import Path
|
3 | 3 | from selenium.webdriver.support.ui import Select
|
4 | 4 | from selenium import webdriver
|
5 | 5 | from selenium.webdriver.chrome.options import Options
|
6 | 6 | chrome_options = Options()
|
7 | 7 | chrome_options.add_argument("--headless")
|
8 | 8 | from selenium.webdriver.common.keys import Keys
|
9 |
| - |
| 9 | + |
| 10 | +#Workaround for if pyopenssl is installed and we want weak keys |
| 11 | +try: |
| 12 | + from urllib3.contrib import pyopenssl |
| 13 | + pyopenssl.extract_from_urllib3() |
| 14 | +except ImportError: |
| 15 | + pass |
10 | 16 |
|
11 | 17 | # Login to SIS
|
12 | 18 | def login(driver):
|
@@ -104,10 +110,13 @@ def saveImagesToFolder(term, course, class_list):
|
104 | 110 | # regardless if email or not, get image if the current dict key is img url
|
105 | 111 | if k == "img url":
|
106 | 112 | img_url = class_list[i].get(k)
|
| 113 | + # download and save the image to a specific folder (term/course_section) from the image url |
107 | 114 | img_name = rcs_id+".png"
|
108 | 115 | filepath = path / img_name
|
109 |
| - urllib.request.urlretrieve(img_url, str(filepath)) |
110 |
| - |
| 116 | + #TODO: Get SSL cipher setting to work with requests, right now still getting handshake errors |
| 117 | + r = requests.get(img_url) |
| 118 | + with open(str(filepath),'wb') as f: |
| 119 | + f.write(r.content) |
111 | 120 |
|
112 | 121 | # returns the class list of dictionaries of info collected about each student's img url, name, and email
|
113 | 122 | def getStudentInfoFromCourse(driver, select_course, index, class_list):
|
@@ -138,9 +147,18 @@ def getStudentInfoFromCourse(driver, select_course, index, class_list):
|
138 | 147 | img_url = driver.current_url
|
139 | 148 | driver.get(img_url)
|
140 | 149 |
|
141 |
| - # image |
142 |
| - image = driver.find_elements_by_tag_name('img')[6].get_attribute('src') |
143 |
| - student_record['img url'] = image |
| 150 | + # image, initalize to empty string |
| 151 | + student_record['img url'] = "" |
| 152 | + image_arr = driver.find_elements_by_tag_name('img') |
| 153 | + |
| 154 | + #do search through all <img> tags for first non-header-layout tag |
| 155 | + #have to skip 2 more <img> tags because they are transparent images |
| 156 | + for i in range(len(image_arr)): |
| 157 | + if image_arr[i].get_attribute('NAME') != "web_tab_corner_right": |
| 158 | + student_record['img url'] = image_arr[i+2].get_attribute('src') |
| 159 | + #Uncomment this line to print the image URLs we are attempting, useful for debugging |
| 160 | + #print("found non-match, +2 is " + student_record['img url']) |
| 161 | + break |
144 | 162 |
|
145 | 163 | # name
|
146 | 164 | info_name = driver.find_elements_by_class_name('plaintable')[4].find_element_by_tag_name('tbody').find_element_by_tag_name('tr').find_elements_by_tag_name('td')[1].text
|
@@ -212,11 +230,14 @@ def getInfoFromCourse(driver):
|
212 | 230 | print("Invalid answer! Try again!")
|
213 | 231 |
|
214 | 232 | if __name__ == "__main__":
|
| 233 | + #Just setting the default ciphers (for this session) to be weak DES/SHA for SIS compatibility |
| 234 | + #Be careful about navigating to any other sites... |
| 235 | + requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS = 'DES-CBC3-SHA:AES128-SHA' |
215 | 236 | driver = webdriver.Chrome(chrome_options=chrome_options)
|
216 | 237 | try:
|
217 | 238 | # open SIS
|
218 | 239 | driver.get('https://sis.rpi.edu/')
|
219 |
| - # if login is valid with correct User ID or PIN, econtinue the program by collecting data |
| 240 | + # if login is valid with correct User ID or PIN, continue the program by collecting data |
220 | 241 | if login(driver):
|
221 | 242 | getInfoFromCourse(driver)
|
222 | 243 | finally:
|
|
0 commit comments