Skip to content

Commit 00b8d87

Browse files
holzbhbmcutler
authored andcommitted
Scraper now supports any ordering/number of columns in student table (#5)
* Scraper now supports any ordering/number of columns in student table * comments
1 parent 5d6c0cd commit 00b8d87

File tree

2 files changed

+20
-3
lines changed

2 files changed

+20
-3
lines changed

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,7 @@
11
# InstructorTools
2-
Tools for instructors who use Submitty in their classes
2+
Tools for instructors who use Submitty in their classes.
3+
4+
Instructions for usage of the Image Scraper tool are here:
5+
http://submitty.org/instructor/image_scraper
6+
7+
https://github.com/Submitty/submitty.github.io

SIS_Images_Scraper.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,6 @@ def saveImagesToFolder(term, course, class_list):
113113
# download and save the image to a specific folder (term/course_section) from the image url
114114
img_name = rcs_id+".png"
115115
filepath = path / img_name
116-
#TODO: Get SSL cipher setting to work with requests, right now still getting handshake errors
117116
r = requests.get(img_url)
118117
with open(str(filepath),'wb') as f:
119118
f.write(r.content)
@@ -137,12 +136,24 @@ def getStudentInfoFromCourse(driver, select_course, index, class_list):
137136
# find link for pic
138137
student_list = driver.find_elements_by_class_name('datadisplaytable')[2].find_element_by_tag_name('tbody').find_elements_by_tag_name('tr')
139138

139+
# find which column is the "Student Name" column, since it isn't always the same column number
140+
student_headers = student_list[0].find_elements_by_tag_name('th')
141+
stu_col = -1
142+
for i in range(len(student_headers)):
143+
if student_headers[i].text == "Student Name":
144+
stu_col = i
145+
if stu_col <0:
146+
driver.back()
147+
driver.back()
148+
print("Error: Could not find a column labeled \"Student Name\"!")
149+
return 0
150+
140151
# loop through list of students to get image, name, and email
141152
# all info collected from for loop (img url, name, email) put into dict
142153
for s in range(1, len(student_list)):
143154
student_record = {}
144155
student = driver.find_elements_by_class_name('datadisplaytable')[2].find_element_by_tag_name('tbody').find_elements_by_tag_name('tr')[s]
145-
student.find_elements_by_tag_name('td')[1].find_element_by_class_name('fieldmediumtext').click()
156+
student.find_elements_by_tag_name('td')[stu_col].find_element_by_class_name('fieldmediumtext').click()
146157

147158
img_url = driver.current_url
148159
driver.get(img_url)
@@ -219,6 +230,7 @@ def getInfoFromCourse(driver):
219230
elif answer == "exit":
220231
return
221232
elif answer == "y":
233+
print ("Getting student pictures... (this could take a few seconds per student)")
222234
# get the class list of dictionary of email, name, and image per student
223235
class_list = getStudentInfoFromCourse(driver, select_course, index, class_list)
224236
if class_list == 0:

0 commit comments

Comments
 (0)