Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/UTDNebula/api-tools into…
Browse files Browse the repository at this point in the history
… develop
  • Loading branch information
jpahm committed Jan 18, 2024
2 parents 2de7ced + f8b8373 commit e06596e
Show file tree
Hide file tree
Showing 11 changed files with 66 additions and 60 deletions.
13 changes: 7 additions & 6 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,22 @@ go 1.19

require (
github.com/PuerkitoBio/goquery v1.8.1
github.com/UTDNebula/nebula-api/api v0.0.0-20231101170542-3744898c1b0e
github.com/chromedp/cdproto v0.0.0-20230220211738-2b1ec77315c9
github.com/chromedp/chromedp v0.8.7
github.com/UTDNebula/nebula-api/api v0.0.0-20231204040332-adccdc08b203
github.com/chromedp/cdproto v0.0.0-20231101223124-24f5925b5980
github.com/chromedp/chromedp v0.9.3
github.com/joho/godotenv v1.5.1
go.mongodb.org/mongo-driver v1.12.1
go.mongodb.org/mongo-driver v1.13.0
)

require (
github.com/andybalholm/cascadia v1.3.1 // indirect
github.com/chromedp/sysutil v1.0.0 // indirect
github.com/gobwas/httphead v0.1.0 // indirect
github.com/gobwas/pool v0.2.1 // indirect
github.com/gobwas/ws v1.1.0 // indirect
github.com/gobwas/ws v1.3.0 // indirect
github.com/google/go-cmp v0.5.5 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
golang.org/x/net v0.17.0 // indirect
golang.org/x/sys v0.13.0 // indirect
golang.org/x/sys v0.14.0 // indirect
)
31 changes: 18 additions & 13 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM=
github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ=
github.com/UTDNebula/nebula-api/api v0.0.0-20231101170542-3744898c1b0e h1:HHkRFB02rzU8FPhpUiy+AvUCYApsx369OGtUaK0zy5Y=
github.com/UTDNebula/nebula-api/api v0.0.0-20231101170542-3744898c1b0e/go.mod h1:/qhwfZIy/fvEDMklOqynJ34K2H2Ml40bcu9A/EB2AfU=
github.com/UTDNebula/nebula-api/api v0.0.0-20231204040332-adccdc08b203 h1:Q2LpqHQs7Ghf+qfFjxqwDQRQSzUXiTI6QC37j8Jb2QI=
github.com/UTDNebula/nebula-api/api v0.0.0-20231204040332-adccdc08b203/go.mod h1:/qhwfZIy/fvEDMklOqynJ34K2H2Ml40bcu9A/EB2AfU=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/chromedp/cdproto v0.0.0-20230220211738-2b1ec77315c9 h1:wMSvdj3BswqfQOXp2R1bJOAE7xIQLt2dlMQDMf836VY=
github.com/chromedp/cdproto v0.0.0-20230220211738-2b1ec77315c9/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs=
github.com/chromedp/chromedp v0.8.7 h1:dYOYc5ynTBzwSLOi+1IfgHwPr8r2BqV48l/RC+3OuJ0=
github.com/chromedp/chromedp v0.8.7/go.mod h1:iL+ywnwk3eG3EVXV1ackXBMNzdEh3Ye/KHvQkq1KRKU=
github.com/chromedp/cdproto v0.0.0-20231011050154-1d073bb38998/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs=
github.com/chromedp/cdproto v0.0.0-20231101223124-24f5925b5980 h1:/nOO3ctHFqUmUwK5EdItjTCRQQCTcxWU7aJgyjFGtEc=
github.com/chromedp/cdproto v0.0.0-20231101223124-24f5925b5980/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs=
github.com/chromedp/chromedp v0.9.3 h1:Wq58e0dZOdHsxaj9Owmfcf+ibtpYN1N0FWVbaxa/esg=
github.com/chromedp/chromedp v0.9.3/go.mod h1:NipeUkUcuzIdFbBP8eNNvl9upcceOfWzoJn6cRe4ksA=
github.com/chromedp/sysutil v1.0.0 h1:+ZxhTpfpZlmchB58ih/LBHX52ky7w2VhQVKQMucy3Ic=
github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
Expand All @@ -16,27 +17,31 @@ github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU
github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM=
github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og=
github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
github.com/gobwas/ws v1.1.0 h1:7RFti/xnNkMJnrK7D1yQ/iCIB5OrrY/54/H930kIbHA=
github.com/gobwas/ws v1.1.0/go.mod h1:nzvNcVha5eUziGrbxFCo6qFIojQHjJV5cLYIbezhfL0=
github.com/gobwas/ws v1.3.0 h1:sbeU3Y4Qzlb+MOzIe6mQGf7QR4Hkv6ZD0qhGkBFL2O0=
github.com/gobwas/ws v1.3.0/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo=
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc=
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhAVbbWWBzr41ElhJx5tXPWkIHA2HWPRuw=
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0=
github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4=
github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM=
github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
go.mongodb.org/mongo-driver v1.12.1 h1:nLkghSU8fQNaK7oUmDhQFsnrtcoNy7Z6LVFKsEecqgE=
go.mongodb.org/mongo-driver v1.12.1/go.mod h1:/rGBTebI3XYboVmgz+Wv3Bcbl3aD0QF9zl6kDDw18rQ=
go.mongodb.org/mongo-driver v1.13.0 h1:67DgFFjYOCMWdtTEmKFpV3ffWlFnh+CYZ8ZS/tXWUfY=
go.mongodb.org/mongo-driver v1.13.0/go.mod h1:/rGBTebI3XYboVmgz+Wv3Bcbl3aD0QF9zl6kDDw18rQ=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
Expand All @@ -53,14 +58,14 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201207223542-d4d67f95c62d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q=
golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
Expand Down
2 changes: 1 addition & 1 deletion parser/courseParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func parseCourse(courseNum string, session schema.AcademicSession, rowInfo map[s

course = &schema.Course{}

course.Id = primitive.NewObjectID()
course.Id = schema.IdWrapper(primitive.NewObjectID().Hex())
course.Course_number = idMatches[2]
course.Subject_prefix = idMatches[1]
course.Title = rowInfo["Course Title:"]
Expand Down
9 changes: 4 additions & 5 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,19 @@ import (

"github.com/PuerkitoBio/goquery"
"github.com/UTDNebula/nebula-api/api/schema"
"go.mongodb.org/mongo-driver/bson/primitive"
)

// Main dictionaries for mapping unique keys to the actual data
var Sections = make(map[primitive.ObjectID]*schema.Section)
var Sections = make(map[schema.IdWrapper]*schema.Section)
var Courses = make(map[string]*schema.Course)
var Professors = make(map[string]*schema.Professor)

// Auxilliary dictionaries for mapping the generated ObjectIDs to the keys used in the above maps, used for validation purposes
var CourseIDMap = make(map[primitive.ObjectID]string)
var ProfessorIDMap = make(map[primitive.ObjectID]string)
var CourseIDMap = make(map[schema.IdWrapper]string)
var ProfessorIDMap = make(map[schema.IdWrapper]string)

// Requisite parser closures associated with courses
var ReqParsers = make(map[primitive.ObjectID]func())
var ReqParsers = make(map[schema.IdWrapper]func())

// Grade mappings for section grade distributions, mapping is MAP[SEMESTER] -> MAP[SUBJECT + NUMBER + SECTION] -> GRADE DISTRIBUTION
var GradeMap map[string]map[string][]int
Expand Down
21 changes: 13 additions & 8 deletions parser/professorParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,23 @@ import (
"go.mongodb.org/mongo-driver/bson/primitive"
)

func parseProfessors(sectionId primitive.ObjectID, rowInfo map[string]string, classInfo map[string]string) []primitive.ObjectID {
func parseProfessors(sectionId schema.IdWrapper, rowInfo map[string]string, classInfo map[string]string) []schema.IdWrapper {
professorText := rowInfo["Instructor(s):"]
professorMatches := personRegexp.FindAllStringSubmatch(professorText, -1)
var profRefs []primitive.ObjectID = make([]primitive.ObjectID, 0, len(professorMatches))
var profRefs []schema.IdWrapper = make([]schema.IdWrapper, 0, len(professorMatches))
for _, match := range professorMatches {

nameStr := match[1]
nameStr := trimWhitespace(match[1])

Check failure on line 16 in parser/professorParser.go

View workflow job for this annotation

GitHub Actions / build_ubuntu

undefined: trimWhitespace

Check failure on line 16 in parser/professorParser.go

View workflow job for this annotation

GitHub Actions / build_windows

undefined: trimWhitespace
names := strings.Split(nameStr, " ")

firstName := names[0]
firstName := strings.Join(names[:len(names)-1], " ")
lastName := names[len(names)-1]

// Ignore blank names, because they exist for some reason???
if firstName == "" || lastName == "" {
continue
}

profKey := firstName + lastName

prof, profExists := Professors[profKey]
Expand All @@ -29,12 +34,12 @@ func parseProfessors(sectionId primitive.ObjectID, rowInfo map[string]string, cl
}

prof = &schema.Professor{}
prof.Id = primitive.NewObjectID()
prof.Id = schema.IdWrapper(primitive.NewObjectID().Hex())
prof.First_name = firstName
prof.Last_name = lastName
prof.Titles = []string{match[2]}
prof.Email = match[3]
prof.Sections = []primitive.ObjectID{sectionId}
prof.Titles = []string{trimWhitespace(match[2])}

Check failure on line 40 in parser/professorParser.go

View workflow job for this annotation

GitHub Actions / build_ubuntu

undefined: trimWhitespace

Check failure on line 40 in parser/professorParser.go

View workflow job for this annotation

GitHub Actions / build_windows

undefined: trimWhitespace
prof.Email = trimWhitespace(match[3])

Check failure on line 41 in parser/professorParser.go

View workflow job for this annotation

GitHub Actions / build_ubuntu

undefined: trimWhitespace

Check failure on line 41 in parser/professorParser.go

View workflow job for this annotation

GitHub Actions / build_windows

undefined: trimWhitespace
prof.Sections = []schema.IdWrapper{sectionId}
profRefs = append(profRefs, prof.Id)
Professors[profKey] = prof
ProfessorIDMap[prof.Id] = profKey
Expand Down
26 changes: 9 additions & 17 deletions parser/sectionParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (

var sectionPrefixRegexp *regexp.Regexp = regexp.MustCompile(`^(?i)[A-Z]{2,4}[0-9V]{4}\.([0-9A-z]+)`)
var coreRegexp *regexp.Regexp = regexp.MustCompile(`[0-9]{3}`)
var personRegexp *regexp.Regexp = regexp.MustCompile(`\s*([\w ]+)\s+・\s+([A-z ]+)\s+・\s+([\w@.]+)`)
var personRegexp *regexp.Regexp = regexp.MustCompile(`(.+)・(.+)・(.+)`)

func parseSection(courseRef *schema.Course, classNum string, syllabusURI string, session schema.AcademicSession, rowInfo map[string]string, classInfo map[string]string) {
// Get subject prefix and course number by doing a regexp match on the section id
Expand All @@ -21,7 +21,7 @@ func parseSection(courseRef *schema.Course, classNum string, syllabusURI string,

section := &schema.Section{}

section.Id = primitive.NewObjectID()
section.Id = schema.IdWrapper(primitive.NewObjectID().Hex())
section.Section_number = idMatches[1]
section.Course_reference = courseRef.Id

Expand All @@ -38,12 +38,12 @@ func parseSection(courseRef *schema.Course, classNum string, syllabusURI string,
section.Teaching_assistants = make([]schema.Assistant, 0, len(assistantMatches))
for _, match := range assistantMatches {
assistant := schema.Assistant{}
nameStr := match[1]
nameStr := trimWhitespace(match[1])

Check failure on line 41 in parser/sectionParser.go

View workflow job for this annotation

GitHub Actions / build_ubuntu

undefined: trimWhitespace

Check failure on line 41 in parser/sectionParser.go

View workflow job for this annotation

GitHub Actions / build_windows

undefined: trimWhitespace
names := strings.Split(nameStr, " ")
assistant.First_name = names[0]
assistant.First_name = strings.Join(names[:len(names)-1], " ")
assistant.Last_name = names[len(names)-1]
assistant.Role = match[2]
assistant.Email = match[3]
assistant.Role = trimWhitespace(match[2])

Check failure on line 45 in parser/sectionParser.go

View workflow job for this annotation

GitHub Actions / build_ubuntu

undefined: trimWhitespace

Check failure on line 45 in parser/sectionParser.go

View workflow job for this annotation

GitHub Actions / build_windows

undefined: trimWhitespace
assistant.Email = trimWhitespace(match[3])

Check failure on line 46 in parser/sectionParser.go

View workflow job for this annotation

GitHub Actions / build_ubuntu

undefined: trimWhitespace

Check failure on line 46 in parser/sectionParser.go

View workflow job for this annotation

GitHub Actions / build_windows

undefined: trimWhitespace
section.Teaching_assistants = append(section.Teaching_assistants, assistant)
}

Expand Down Expand Up @@ -130,17 +130,9 @@ func getMeetings(rowInfo map[string]string, classInfo map[string]string) []schem

meeting.Meeting_days = strings.Split(match[3], ", ")

startTime, err := time.ParseInLocation("3:04pm", match[4], timeLocation)
if err != nil {
panic(err)
}
meeting.Start_time = startTime

endTime, err := time.ParseInLocation("3:04pm", match[5], timeLocation)
if err != nil {
panic(err)
}
meeting.End_time = endTime
// Don't parse time into time object, adds unnecessary extra data
meeting.Start_time = match[4]
meeting.End_time = match[5]

// Only add location data if it's available
if len(match) > 6 {
Expand Down
2 changes: 1 addition & 1 deletion parser/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ func WriteJSON(filepath string, data interface{}) error {
defer fptr.Close()
encoder := json.NewEncoder(fptr)
encoder.SetIndent("", "\t")
encoder.Encode(GetMapValues(Courses))
encoder.Encode(data)
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion scrapers/coursebook.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
courseBuilder.Write(buf.Bytes())
}
// Find all section IDs in returned data
sectionRegexp := regexp.MustCompile(fmt.Sprintf("View details for section (%s[0-9v]{4}\\.[0-9a-z]{3}\\.[0-9]{2}[suf])", coursePrefix[3:]))
sectionRegexp := regexp.MustCompile(fmt.Sprintf(`View details for section (%s[0-9v]{4}\.\w+\.[0-9]{2}[suf])`, coursePrefix[3:]))
smatches := sectionRegexp.FindAllStringSubmatch(courseBuilder.String(), -1)
sectionIDs := make([]string, 0, len(smatches))
for _, matchSet := range smatches {
Expand Down
2 changes: 1 addition & 1 deletion scrapers/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ func ScrapeEvents(outDir string) {
log.Printf("Scraped contact phone info: %s\n", contactInformationPhone)

events = append(events, schema.Event{
Id: primitive.NewObjectID(),
Id: schema.IdWrapper(primitive.NewObjectID().Hex()),
Summary: summary,
Location: location,
StartTime: dateTimeStart,
Expand Down
14 changes: 9 additions & 5 deletions scrapers/organizations.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ func processCsv(ctx context.Context, inputPath string, storageFilePath string) e
encoder := json.NewEncoder(bufio.NewWriter(storageFile))
encoder.SetIndent("", "\t")

var _ []*schema.Organization
var orgs []*schema.Organization
// process each row of csv
for i := 1; true; i++ {
entry, err := csvReader.Read()
Expand All @@ -189,9 +189,13 @@ func processCsv(ctx context.Context, inputPath string, storageFilePath string) e
if err != nil {
return err
}
if err := encoder.Encode(org); err != nil {
return err
}

orgs = append(orgs, org)
}

// Write JSON to file
if err = encoder.Encode(orgs); err != nil {
return err
}

if err := csvFile.Close(); err != nil {
Expand All @@ -218,7 +222,7 @@ func parseCsvRecord(ctx context.Context, entry []string) (*schema.Organization,
log.Printf("Error retrieving image for %s: %v\n", entry[0], err)
}
return &schema.Organization{
Id: primitive.NewObjectID(),
Id: schema.IdWrapper(primitive.NewObjectID().Hex()),
Title: entry[0],
Categories: parseCategories(entry[1]),
Description: entry[2],
Expand Down
4 changes: 2 additions & 2 deletions scrapers/profiles.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ func ScrapeProfiles(outDir string) {
log.Printf("Parsed list! #: %s, Office: %v\n\n", phoneNumber, office)

professors = append(professors, schema.Professor{
Id: primitive.NewObjectID(),
Id: schema.IdWrapper(primitive.NewObjectID().Hex()),
First_name: firstName,
Last_name: lastName,
Titles: titles,
Expand All @@ -280,7 +280,7 @@ func ScrapeProfiles(outDir string) {
Profile_uri: link,
Image_uri: imageUri,
Office_hours: []schema.Meeting{},
Sections: []primitive.ObjectID{},
Sections: []schema.IdWrapper{},
})

log.Printf("Scraped profile for %s %s!\n\n", firstName, lastName)
Expand Down

0 comments on commit e06596e

Please sign in to comment.