File tree Expand file tree Collapse file tree 1 file changed +2
-3
lines changed Expand file tree Collapse file tree 1 file changed +2
-3
lines changed Original file line number Diff line number Diff line change @@ -17,7 +17,7 @@ def extract_as_txt(filename):
17
17
for i in range (number_of_pages ):
18
18
page = reader .pages [i ]
19
19
text = page .extract_text (extraction_mode = "layout" , layout_mode_space_vertically = False )
20
- text = re .sub (re .compile ("\s+Page\s[0-9]+\sof\s[0-9]+(\s+)?" ), "" , text )
20
+ text = re .sub (re .compile ("\s+Page\s[0-9]+\sof\s[0-9]+(\s+)?" ), "\n " , text )
21
21
f .write (text )
22
22
23
23
f .close ()
@@ -51,9 +51,8 @@ def add_entry(group_list):
51
51
f .close ()
52
52
53
53
entry_data = []
54
- text = "\n " .join (re .split (re .compile ("\s+Page\s[0-9]+\sof\s[0-9]+\s+" ), text ))
55
54
section_pattern = r"(NO|OK|\+\-)\s+(((\s+[A-Z0-9\:\&\/\']+)+)(\s+\*+(\s+(\s+([A-Z0-9\:\&\/\']+))+)?)?)"
56
- subsection_pattern = r"(\s{3,}|\n\s*|GPA )(\-|\+|ip\s*(\-|\+))(\s+R\s)?(\s+[0-9]\))?\s*(([A-Z0-9(\/)\:\']([A-Za-z0-9\/\:\'\-]+\s)*)([A-Za-z0-9\/\:\-\']+))(\s{3,}|\n\s*)"
55
+ subsection_pattern = r"(\s{3,}|\n\s*)(\-|\+|ip\s*(\-|\+))(\s+R\s)?(\s+[0-9]\))?\s*(([A-Z0-9(\/)\:\']([A-Za-z0-9\/\:\'\-]+\s)*)([A-Za-z0-9\/\:\-\']+))(\s{3,}|\n\s*)"
57
56
entry_pattern = r"(([a-zA-Z0-9]{4})\s+([a-zA-z]+\s?[0-9]+([a-zA-Z]?)+)\s+([0-9]+\.[0-9]+)\s+([A-Za-z\-\+\/]+)\s+(\>S|\>X|\>\-|RP|\>D|\>R)?\s+((([\w\&\/]+\s)+)?\w+(\n|[^.])?))"
58
57
sections = extract_section (text , section_pattern )
59
58
validation_length = len ([_ for _ in extract_section (text , entry_pattern )])
You can’t perform that action at this time.
0 commit comments