8
8
import regex as re
9
9
import json
10
10
import sys
11
+ import json
12
+
13
+ section_pattern = r"(NO|OK|\+\-)\s+(((\s+[A-Z0-9\:\&\/\']+)+)(\s+\*+(\s+(\s+([A-Z0-9\:\&\/\']+))+)?)?)"
14
+ subsection_pattern = r"(\s{3,}|\n\s*)(\-|\+|ip\s*(\-|\+))(\s+R\s)?(\s+[0-9]\))?\s*(([A-Z0-9(\/)\:\']([A-Za-z0-9\/\:\'\-]+\s)*)([A-Za-z0-9\/\:\-\']+))(\s{3,}|\n\s*)"
15
+ entry_pattern = r"(([a-zA-Z0-9]{4})\s+([a-zA-z]+\s?[0-9]+([a-zA-Z]?)+)\s+([0-9]+\.[0-9]+)\s+([A-Za-z\-\+\/]+)\s+(\>S|\>X|\>\-|RP|\>D|\>R)?\s+((([\w\&\/]+\s)+)?\w+(\n|[^.])?))"
16
+ reqs_pattern = r"((Needs\:)\s+([1-9]+)\s*.+(\n\s+)?)?(Select from\:)(((\s+[A-Z]+\s*)?(\,)?([0-9]+[A-Z]*)(\([A-Z0-9\s]+\))?(\,)?)+)"
17
+ req_option_pattern = r"([A-Z]+)?\s*((([0-9]+)([A-Z])?)(\s*TO\s*([0-9]+))?)"
18
+
19
+ course_catalog = {}
20
+ with open ("../course_data/data.json" , "r" ) as f :
21
+ course_catalog = json .load (f )
22
+
23
+ print (len (course_catalog .keys ()))
24
+
25
+ class Response :
26
+ def __init__ (self , status : bool , message : str ):
27
+ self .status = status
28
+ self .message = message
29
+
30
+ def __dict__ (self ):
31
+ return {
32
+ "status" : self .status ,
33
+ "message" : self .message
34
+ }
35
+
36
+ def __str__ (self ):
37
+ print (self .__dict__ ())
38
+
39
+ def is_course_available (key ):
40
+ if key not in course_catalog .keys ():
41
+ return Response (False , "not found" )
42
+
43
+ if "sessions" not in course_catalog [key ].keys ():
44
+ return Response (False , "no sessions available" )
45
+
46
+ session_available = False
47
+ for course_session in course_catalog [key ]["sessions" ]:
48
+ is_full = int (course_session ["enrolled" ]) >= int (course_session ["capacity" ])
49
+ is_closed = course_session ["status" ].lower () != "open"
50
+ if not is_full and not is_closed :
51
+ session_available = True
52
+
53
+ if not session_available :
54
+ return Response (False , "all sessions are closed or unavailable" )
55
+
56
+ return Response (True , "session is available" )
57
+
58
+
11
59
12
60
def extract_as_txt (filename ):
13
61
reader = PdfReader (f"pdf/{ filename } .pdf" )
@@ -22,7 +70,7 @@ def extract_as_txt(filename):
22
70
23
71
f .close ()
24
72
25
- def extract_section (text , pattern ):
73
+ def extract_section (pattern , text ):
26
74
s = list (re .finditer (pattern , text ))
27
75
matches = [x .start () for x in s ]
28
76
matches_shifted = matches [1 :] + [len (text )]
@@ -42,6 +90,56 @@ def add_entry(group_list):
42
90
}
43
91
entry_data .append (data )
44
92
93
+ def add_all_entries (text ):
94
+ global entry_pattern
95
+ entries = extract_section (entry_pattern , text )
96
+ for entry_obj , _ in entries :
97
+ group_list = list (entry_obj .groups ())
98
+ add_entry (group_list )
99
+
100
+ def add_req (key ):
101
+ if key in course_catalog .keys ():
102
+ data = {
103
+ "course" : key ,
104
+ "type" : " " .join (str (section_key ).split ()),
105
+ "subtype" : " " .join (str (entry_title ).split ()),
106
+ "availability" : is_course_available (key ).__dict__ (),
107
+ ** (course_catalog [key ])
108
+ }
109
+ else :
110
+ data = {
111
+ "course" : key ,
112
+ "type" : " " .join (str (section_key ).split ()),
113
+ "subtype" : " " .join (str (entry_title ).split ()),
114
+ "availability" : is_course_available (key ).__dict__ ()
115
+ }
116
+ req_data .append (data )
117
+
118
+ def add_all_reqs (text ):
119
+ reqs = extract_section (reqs_pattern , text )
120
+ course_code = ""
121
+ for _ , reqs_str in reqs :
122
+ req_options = extract_section (req_option_pattern , reqs_str )
123
+ for req_option_obj , req_option_str in list (req_options )[1 :]:
124
+ program_prefix = req_option_obj .group (1 )
125
+ program_suffix = req_option_obj .group (4 )
126
+ program_range = req_option_obj .group (7 )
127
+ min = int (program_suffix )
128
+ max = min
129
+ if program_prefix is not None :
130
+ course_code = program_prefix
131
+
132
+ if program_range is not None :
133
+ max = int (program_range )
134
+
135
+ for i in range (min , max ):
136
+ key = course_code + str (i ) if i != min else course_code + req_option_obj .group (3 )
137
+ if key in course_catalog .keys ():
138
+ add_req (key )
139
+
140
+ key = course_code + req_option_obj .group (3 )
141
+ add_req (key )
142
+
45
143
if __name__ == "__main__" :
46
144
audit_name = sys .argv [1 ]
47
145
extract_as_txt (audit_name )
@@ -51,49 +149,37 @@ def add_entry(group_list):
51
149
f .close ()
52
150
53
151
entry_data = []
54
- section_pattern = r"(NO|OK|\+\-)\s+(((\s+[A-Z0-9\:\&\/\']+)+)(\s+\*+(\s+(\s+([A-Z0-9\:\&\/\']+))+)?)?)"
55
- subsection_pattern = r"(\s{3,}|\n\s*)(\-|\+|ip\s*(\-|\+))(\s+R\s)?(\s+[0-9]\))?\s*(([A-Z0-9(\/)\:\']([A-Za-z0-9\/\:\'\-]+\s)*)([A-Za-z0-9\/\:\-\']+))(\s{3,}|\n\s*)"
56
- entry_pattern = r"(([a-zA-Z0-9]{4})\s+([a-zA-z]+\s?[0-9]+([a-zA-Z]?)+)\s+([0-9]+\.[0-9]+)\s+([A-Za-z\-\+\/]+)\s+(\>S|\>X|\>\-|RP|\>D|\>R)?\s+((([\w\&\/]+\s)+)?\w+(\n|[^.])?))"
57
- sections = extract_section (text , section_pattern )
58
- validation_length = len ([_ for _ in extract_section (text , entry_pattern )])
152
+ req_data = []
153
+
154
+ sections = extract_section (section_pattern , text )
155
+ validation_length = len ([_ for _ in extract_section (entry_pattern , text )])
59
156
60
157
for section_obj , section_str in sections :
61
- subsections = extract_section (section_str , subsection_pattern )
158
+ subsections = list ( extract_section (subsection_pattern , section_str ) )
62
159
section_key = section_obj .groups ()[2 ]
63
- has_subsection = False
160
+
161
+ if len (subsections ) == 0 :
162
+ entry_title = section_key
163
+ add_all_entries (section_str )
164
+ add_all_reqs (section_str )
165
+ continue
64
166
65
167
for subsection_obj , subsection_str in subsections :
66
168
has_subsection = True
67
169
entry_title = subsection_obj .groups ()[5 ]
68
-
69
- entries = extract_section (subsection_str , entry_pattern )
70
- for entry_obj , entry_str in entries :
71
- group_list = list (entry_obj .groups ())
72
- add_entry (group_list )
73
-
74
- if has_subsection :
75
- continue
76
-
77
- entries = extract_section (section_str , entry_pattern )
78
- entry_title = section_key
79
- for entry_obj , entry_str in entries :
80
- group_list = list (entry_obj .groups ())
81
- add_entry (group_list )
170
+ add_all_entries (subsection_str )
171
+ add_all_reqs (subsection_str )
82
172
83
- with open (f"json/{ audit_name } .json" , "w+" ) as outfile :
173
+ with open (f"json/{ audit_name } -past .json" , "w+" ) as outfile :
84
174
json .dump (entry_data , outfile , indent = 4 )
85
175
176
+ with open (f"json/{ audit_name } .json" , "w+" ) as outfile :
177
+ json .dump (req_data , outfile , indent = 4 )
178
+
86
179
if len (entry_data ) != validation_length :
87
180
print (f"Warning: Failed to parse { validation_length - len (entry_data )} entries" )
88
181
print ("\t Total entries:" , len (entry_data ), "Expected entries:" , validation_length , "\n " )
89
182
else :
90
183
print ("Parsing complete! " , end = "" )
91
184
92
- print ("Saved as" , f"json/{ audit_name } .json" )
93
-
94
-
95
- # (NO|OK|\+\-)\s+(((\s+[A-Z0-9\:\&\/\']+)+)(\s+\*+(\s+(\s+([A-Z0-9\:\&\/\']+))+)?)?)
96
- # ((NO|OK|\+\-)\s+(((\s+[A-Z0-9\:\&\/\']+)+)(\s+\*+(\s+(\s+([A-Z0-9\:\&\/\']+))+)?)?)|(\n\s|\s\s|\n)(ip\s)?(\-|\+)\s{2,}([0-9]+\))?\s?([A-Za-z\/\-\:0-9]+\s(\-\s)?)+)\s(^(?:(?!((NO|OK|\+\-)\s+([A-Za-z0-9]+( [A-Za-z0-9]+)+)\s+\*+|(\n\s|\s\s|\n)(ip\s)?(\-|\+)\s{2,}([0-9]+\))?\s?([A-Za-z\/\-\:0-9]+\s(\-\s)?)+)).)*$\n){0,}
97
- # ((NO|OK|\+\-)\s+([A-Za-z0-9]+( [A-Za-z0-9]+)+)\s+\*+|(\n\s|\s\s|\n)(ip\s)?(\-|\+)\s{2,}([0-9]+\))?\s?([A-Za-z\/\-\:0-9]+\s(\-\s)?)+)\s(^(?:(?!((NO|OK|\+\-)\s+([A-Za-z0-9]+( [A-Za-z0-9]+)+)\s+\*+|(\n\s|\s\s|\n)(ip\s)?(\-|\+)\s{2,}([0-9]+\))?\s?([A-Za-z\/\-\:0-9]+\s(\-\s)?)+)).)*$\n){0,}
98
- # (\s{3,}|\n\s*)(\-|\+|ip\s*(\-|\+))(\s+R\s)?(\s+[0-9]\))?\s*([A-Za-z0-9]+)(\s[A-Za-z0-9]+)*(\s{3,}|\n\s*)
99
- # (\s{3,}|\n\s*)(\-|\+|ip\s*(\-|\+))(\s+R\s)?(\s+[0-9]\))?\s*(([A-Z0-9(\/)\:\']([A-Za-z0-9\/\:\'\-]+\s)*)([A-Za-z0-9\/\:\-\']+))(\s{3,}|\n\s*)
185
+ print ("Saved as" , f"json/{ audit_name } .json" )
0 commit comments