Skip to content

Commit 66abdf9

Browse files
author
Jim Tuttle
committed
Added support for graduate students and hospital house staff. Now using NetID as unique key.
1 parent 1b7246b commit 66abdf9

File tree

1 file changed

+100
-94
lines changed

1 file changed

+100
-94
lines changed

hrDataFeeder.py

Lines changed: 100 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -13,40 +13,36 @@
1313
from sys import exit
1414
from djangoutil.xmlrpc import getServerProxy
1515
from xml.sax.saxutils import escape
16-
16+
from ConfigParser import SafeConfigParser
1717
import codecs
1818
import io
19-
19+
from django.conf import settings
20+
from djangoutil import config
21+
settings.configure(config)
2022
# encoding=utf8
2123
import sys
2224
reload(sys)
2325
sys.setdefaultencoding('utf8')
24-
25-
26-
from django.conf import settings
27-
from djangoutil import config
28-
settings.configure(config)
29-
30-
3126
# set to UTF-8 to capture diacritics
3227
environ['NLS_LANG']= 'AMERICAN_AMERICA.AL32UTF8'
3328

3429

30+
31+
# database configuration
32+
usedb = 'test' # choose database: dev, test, or prod
33+
config = SafeConfigParser()
34+
config.read(join(getcwd(), 'fdr.config')) # read config file to gather parameters
35+
dbhost = config.get(usedb, 'dbhost')
36+
dbport = config.get(usedb, 'dbport')
37+
dbsid = config.get(usedb, 'dbsid')
38+
dbuser = config.get(usedb, 'dbuser')
39+
dbpassword = config.get(usedb, 'dbpassword')
40+
3541
useldapforemail = False # LDAP is slow and hasn't returned significant number of emails. If False, use [email protected] instead.
3642
sd_file = join(getcwd(), 'libsymel.dat') # Nightly export of Service Directory data
3743
xmlfile = join(getcwd(), 'people.xml') # Output file for Symplectic Elements consumption
3844
affiliationsfile = join(getcwd(), 'affiliations.txt') # Output file for unique affiliations to populate Elements Auto Groups
3945

40-
# set text encoding to UTF-8 to capture diacritics and whatnot
41-
environ['NLS_LANG']= 'AMERICAN_AMERICA.AL32UTF8'
42-
43-
# FDR database specifics
44-
dbhost = 'fdrprd-db.oit.duke.edu'
45-
dbport = '1637'
46-
dbsid = 'FDRPRD'
47-
dbuser = 'libacct'
48-
dbpassword = 'uanbahd10'
49-
5046
# instantiate and configure logger
5147
logfile = join(getcwd(), 'hrDataFeeder.log')
5248
logger = logging.getLogger('fdrlogger')
@@ -66,70 +62,80 @@ def getResults(ora, sql):
6662

6763
# Take list of dictionaries and build XML elements. Return string.
6864
def buildXml(list):
69-
sequence_dict = {1:'Secondary', 2:'Tertiary', 3:'Quaternary', 4:'Quinary', 5:u'Senary', 6:u'Septenary', 7:u'Octonary', 8:u'Nonary', 9:u'Denary'}
70-
xml = u''
65+
sequence_dict = {1:'Secondary', 2:'Tertiary', 3:'Quaternary', 4:'Quinary', 5:'Senary', 6:'Septenary', 7:'Octonary', 8:'Nonary', 9:'Denary'}
66+
xml = ''
7167
for record in list:
72-
xml += u'\t\t<person>\n'
73-
xml += u'\t\t\t<Lastname>%s</Lastname>\n' % (record[u'surname'])
74-
xml += u'\t\t\t<Firstname>%s</Firstname>\n' % (record[u'forename'])
68+
xml += '\t\t<person>\n'
69+
xml += '\t\t\t<Lastname>%s</Lastname>\n' % (record['surname'])
70+
xml += '\t\t\t<Firstname>%s</Firstname>\n' % (record['forename'])
7571
try:
76-
xml += u'\t\t\t<Middlename>%s</Middlename>\n' % (record[u'middlename'])
72+
xml += '\t\t\t<Middlename>%s</Middlename>\n' % (record['middlename'])
7773
except:
7874
pass
79-
xml += u'\t\t\t<Email>%s</Email>\n' % (record['email']) # removing angle brackets in some email fields
80-
xml += u'\t\t\t<Proprietary_ID>%s</Proprietary_ID>\n' % (record[u'duid'])
81-
xml += u'\t\t\t<Username>%s</Username>\n' % (record[u'netid'])
82-
xml += u'\t\t\t<PrimaryGroupDescriptor>%s</PrimaryGroupDescriptor>\n' % (escape(record[u'primary']))
75+
xml += '\t\t\t<Email>%s</Email>\n' % (record['email']) # removing angle brackets in some email fields
76+
xml += '\t\t\t<Proprietary_ID>%s</Proprietary_ID>\n' % (record['duid'])
77+
xml += '\t\t\t<Username>%s</Username>\n' % (record['netid'])
78+
xml += '\t\t\t<PrimaryGroupDescriptor>%s</PrimaryGroupDescriptor>\n' % (escape(record['primary']))
8379
# this must change in response to addition of school
8480
if 'secondary' in record:
85-
if len(record[u'secondary']) > 0:
81+
if len(record['secondary']) > 0:
8682
i = 1
87-
for appointment in record[u'secondary']:
88-
xml += u'\t\t\t<%sGroupDescriptor>%s</%sGroupDescriptor>\n' % (sequence_dict[i], escape(appointment.strip()), sequence_dict[i])
83+
for appointment in record['secondary']:
84+
xml += '\t\t\t<%sGroupDescriptor>%s</%sGroupDescriptor>\n' % (sequence_dict[i], escape(appointment.strip()), sequence_dict[i])
8985
i += 1
90-
xml += u'\t\t\t<IsAcademic>%s</IsAcademic>\n' % (record[u'academic'])
91-
xml += u'\t\t\t<LoginAllowed>%s</LoginAllowed>\n' % (record[u'login'])
92-
xml += u'\t\t\t<AuthenticatingAuthority>%s</AuthenticatingAuthority>\n' % (record[u'authority'])
93-
xml += u'\t\t</person>\n'
86+
xml += '\t\t\t<IsAcademic>%s</IsAcademic>\n' % (record['academic'])
87+
xml += '\t\t\t<LoginAllowed>%s</LoginAllowed>\n' % (record['login'])
88+
xml += '\t\t\t<AuthenticatingAuthority>%s</AuthenticatingAuthority>\n' % (record['authority'])
89+
xml += '\t\t</person>\n'
9490
return xml
9591

9692

9793
# Build list of dictionaries of FDR people. Also return list of Duke Unique IDs.
9894
def buildFdrDict(data, rpcserver, sd_dict_list):
95+
print 'buildFdrDict'
9996
fdr_dict_list = []
100-
duid_list = []
97+
# CHANGE THIS. DROP FDR RECORD WITHOUT NETID, USE NETID as KEY
98+
netid_list = []
10199
missing_fdr_email = 0
102100
missing_email_found_sd = 0
103101
for record in data:
104102
drop_record = False
105103
fdr_dict = {}
106104
try: # Confusing. FDR forced their names on us. Their PRIMARY_SCHOOL is our primary group, all other groups are secondary for us.
107-
duid, netid, salutation, surname, forename, middlename, lsurname, lforename, lmiddlename, email, primary, school, secondary = record
105+
duid, netid, salutation, surname, forename, middlename, lsurname, lforename, lmiddlename, email, primary, school, secondary, primary_affiliation = record
108106
except ValueError:
109107
logmessage = 'Database view has changed.'
110108
logger.critical(logmessage)
111109
exit()
112110
if not netid: # Some people records do not contain netid. Look in SD file. If not there, log and discard person.
113-
print 'missing netid for ' + duid + ' ' + forename + ' ' + surname
114-
for person in sd_dict_list: # Look through SD records
115-
if duid == person['duid']: # If DUID matches...
116-
print person
117-
netid = person['netid'] # Assign SD netid to person
118-
logmessage = "Found FDR person %s missing netid." % (duid)
119-
logger.info(logmessage)
120-
print logmessage
121-
break
122-
else: # If also no netid in SD, log and set flag to drop this record.
123-
logmessage = "Person %s missing netid in FDR and SD." % (duid)
124-
logger.critical(logmessage)
125-
print logmessage
126-
drop_record = True
111+
logmessage = 'Record dropped - No NetID in FDR. %s %s, %s' % (forename, surname, duid)
112+
logger.critical(logmessage)
113+
print logmessage
114+
drop_record = True
115+
continue
116+
else:
117+
pass
118+
119+
# for person in sd_dict_list: # Look through SD records
120+
# if duid == person['duid']: # If DUID matches...
121+
# print person
122+
# netid = person['netid'] # Assign SD netid to person
123+
# logmessage = "Found FDR person %s missing netid." % (duid)
124+
# logger.info(logmessage)
125+
# print logmessage
126+
# break
127+
# else: # If also no netid in SD, log and set flag to drop this record.
128+
# logmessage = "Person %s missing netid in FDR and SD." % (duid)
129+
# logger.critical(logmessage)
130+
# print logmessage
131+
# drop_record = True
132+
127133
if surname: # If professional name set, use that. Otherwise fall back to legal name.
128134
fdr_dict['surname'] = surname
129135
fdr_dict['forename'] = forename
130136
if middlename: # Many records do not contain middle name.
131137
fdr_dict['middlename'] = middlename
132-
else: # Legal name block.
138+
else: # Legal name block
133139
fdr_dict['surname'] = lsurname
134140
fdr_dict['forename'] = lforename
135141
if lmiddlename:
@@ -142,9 +148,8 @@ def buildFdrDict(data, rpcserver, sd_dict_list):
142148
email = person['email'] # Assign SD netid to person
143149
logmessage = "FDR person %s missing email found in Service Directory." % (duid)
144150
missing_email_found_sd += 1
145-
print logmessage
151+
#print logmessage
146152
#logger.info(logmessage)
147-
print logmessage
148153
break
149154
else:
150155
email = person['email']
@@ -171,7 +176,7 @@ def buildFdrDict(data, rpcserver, sd_dict_list):
171176
fdr_dict['academic'] = 'Y'
172177
fdr_dict['login'] = 'Y'
173178
fdr_dict['authority'] = 'Shibboleth'
174-
duid_list.append(duid)
179+
netid_list.append(netid)
175180
if not drop_record:
176181
fdr_dict_list.append(fdr_dict)
177182
else: # Discard this record and log.
@@ -181,7 +186,7 @@ def buildFdrDict(data, rpcserver, sd_dict_list):
181186
logmessage = '%s FDR records without email addresses' % (missing_fdr_email)
182187
logger.info(logmessage)
183188
print '%s people missing FDR email found in SD' % (missing_email_found_sd)
184-
return fdr_dict_list, duid_list
189+
return fdr_dict_list, netid_list
185190

186191

187192
# Build list of dictionaries of service directory entries after deduplicating people from FDR
@@ -190,39 +195,41 @@ def buildSdDict(sd_file):
190195
duplicates = 0
191196
sd_missing_email = 0
192197
sd = open(sd_file, 'r')
198+
print '1'
193199
for line in sd:
194200
sd_dict = {}
195201
duid , netid, surname, forename, email, status = line.split('|')
196-
sd_dict[u'duid'] = duid
197-
sd_dict[u'netid'] = netid
198-
sd_dict[u'surname'] = surname
199-
sd_dict[u'forename'] = forename
200-
sd_dict[u'primary'] = status.strip() # Remove line break
201-
sd_dict[u'academic'] = u'N'
202-
sd_dict[u'login'] = u'Y'
203-
sd_dict[u'authority'] = u'Shibboleth'
202+
sd_dict['duid'] = duid
203+
sd_dict['netid'] = netid
204+
sd_dict['surname'] = surname
205+
sd_dict['forename'] = forename
206+
sd_dict['primary'] = status.strip() # Remove line break
207+
sd_dict['academic'] = 'N'
208+
sd_dict['login'] = 'Y'
209+
sd_dict['authority'] = 'Shibboleth'
204210
if email:
205211
email = email.translate(None, "<>") # Remove angle brackets present in some email fields
206-
sd_dict[u'email'] = email
212+
sd_dict['email'] = email
207213
else:
208-
sd_dict[u'email'] = netid + u'@duke.edu'
214+
sd_dict['email'] = netid + '@duke.edu'
209215
sd_missing_email += 1
210216
sd_dict_list.append(sd_dict)
211217
sd.close()
212-
logmessage = u'Found %s Service Directory records.' % (len(sd_dict_list) + duplicates)
218+
logmessage = 'Found %s Service Directory records.' % (len(sd_dict_list) + duplicates)
213219
logger.info(logmessage)
214-
logmessage = u'%s Service Directory records without email addresses' % (sd_missing_email)
220+
logmessage = '%s Service Directory records without email addresses' % (sd_missing_email)
215221
logger.info(logmessage)
216222
#logmessage = '%s Service Directory records were duplicates.' % (duplicates)
217223
#logger.info(logmessage)
224+
print 'testing" return buildDdDict'
218225
return sd_dict_list
219226

220227
# Deduplicate the SD people to prevent creating multiple accounts as some will appear in FDR data.
221-
def dedupeSdDictList(sd_dict_list, duid_list):
228+
def dedupeSdDictList(sd_dict_list, netid_list):
222229
duplicates = 0
223230
sd_dict_list_dedupe = []
224231
for record in sd_dict_list:
225-
if record['duid'] not in duid_list: # Deduplicate these records against FDR records.
232+
if record['netid'] not in netid_list: # Deduplicate these records against FDR records.
226233
sd_dict_list_dedupe.append(record)
227234
duplicates += 1
228235
logmessage = "Found %s Service record duplicates." % (duplicates)
@@ -235,14 +242,13 @@ def getUniqueAffiliations(fdr_dict_list):
235242
unique_affiliations_list = []
236243
for dict in fdr_dict_list:
237244
if 'secondary' in dict:
238-
for affiliation in dict[u'secondary']:
245+
for affiliation in dict['secondary']:
239246
if affiliation not in unique_affiliations_list:
240247
unique_affiliations_list.append(affiliation)
241248
return unique_affiliations_list
242249

243250

244251

245-
246252
if __name__=='__main__':
247253
try:
248254
logmessage = "Starting update." # Begin logging
@@ -254,38 +260,36 @@ def getUniqueAffiliations(fdr_dict_list):
254260
logmessage = 'Database connection error.'
255261
logger.critical(logmessage)
256262
exit()
257-
sql = 'select DUID, NETID, SALUTATION, SURNAME, FIRSTNAME, MIDDLENAME, LEGAL_SURNAME, LEGAL_FIRSTNAME, LEGAL_MIDDLENAME, EMAIL, PRIMARY_VIVO_ORG, PRIMARY_SCHOOL, affiliations from APT.V_PEOPLE_WITH_AFFILIATIONS'
263+
sql = 'select DUID, NETID, SALUTATION, SURNAME, FIRSTNAME, MIDDLENAME, LEGAL_SURNAME, LEGAL_FIRSTNAME, LEGAL_MIDDLENAME, EMAIL, PRIMARY_VIVO_ORG, PRIMARY_SCHOOL, affiliations, PRIMARY_AFFILIATION from APT.V_PEOPLE_WITH_AFFILIATIONS'
258264
data = getResults(ora, sql) # Query FDR. data is a list of tuples, 1 tuple per record.
259265
logmessage = 'Found %s FDR faculty.' % (len(data))
260266
logger.info(logmessage)
261267
ora.close()
262-
print '1'
263268
xml_preabmle = '<?xml version="1.0" encoding="UTF-8" ?>\n<HR_Data>\n' # Begin the XML string to write to people.xml
264269
xml_preabmle += '\t<Feed_ID>FDR</Feed_ID>\n'
265270
xml_preabmle += '\t<people>\n'
266-
print '2'
267-
rpcserver = getServerProxy() # Open connection to Service Directory
271+
if useldapforemail:
272+
rpcserver = getServerProxy() # Open connection to Service Directory
273+
else:
274+
rpcserver = False
268275
sd_dict_list = buildSdDict(sd_file) # Build list of attributes about people from Service Directory dump file.
269-
print '3'
270-
fdr_dict_list, duid_list = buildFdrDict(data, rpcserver, sd_dict_list)
276+
fdr_dict_list, netid_list = buildFdrDict(data, rpcserver, sd_dict_list)
277+
271278
unique_affiliations_list = getUniqueAffiliations(fdr_dict_list) # Build list of unique affiliations/appointments for Elements
272-
duid_list.sort()
273-
sd_dict_list_dedupe = dedupeSdDictList(sd_dict_list, duid_list) # Deduplicate Service Directory people so we don't name people twice
279+
netid_list.sort()
280+
281+
sd_dict_list_dedupe = dedupeSdDictList(sd_dict_list, netid_list) # Deduplicate Service Directory people so we don't name people twice
282+
# TESTED TO HERE
274283
sd_xml = buildXml(sd_dict_list_dedupe) # Build the XML string from SD people
284+
print 'testing buildXML sd dict'
275285
fdr_xml = buildXml(fdr_dict_list) # Build the XML string for FDR people
286+
print 'testing buildXML fdr dict'
276287
xml_postamble = '\t</people>\n</HR_Data>'
277288
xml = xml_preabmle + fdr_xml + sd_xml + xml_postamble # Complete XML string.
278-
print 'here'
279-
print type(xml)
280-
# testing utf-8
281-
with io.open(xmlfile,'w', encoding='utf8') as f:
282-
f.write(xml)
283-
print 'here2'
289+
290+
f = open(xmlfile, 'w') # Serialize the XML string
291+
f.write(xml)
284292
f.close()
285-
print 'here3'
286-
#f = open(xmlfile, 'w') # Serialize the XML string
287-
#f.write(xml)
288-
#f.close()
289293

290294
af = open(affiliationsfile, 'w') # Serialize the unique affiliations
291295
unique_affiliations_list.sort
@@ -295,12 +299,14 @@ def getUniqueAffiliations(fdr_dict_list):
295299
logmessage = "Update complete."
296300
print logmessage
297301
logger.info(logmessage)
298-
except:
302+
303+
except Exception as e:
304+
print (e)
299305
# successful sending of email necessitated disabling McAfee email rule
300306
import smtplib
301307
from email.mime.text import MIMEText
302-
msg = MIMEText('The HR data serialization script has failed on Elements production.')
303-
sender = 'elements@duke.edu'
308+
msg = MIMEText('The HR data serialization script has failed on lib-symeldata.')
309+
sender = 'jjim.tuttle@duke.edu'
304310
recipient = '[email protected]'
305311
msg['Subject'] = 'HR data failed on Elements development'
306312
msg['From'] = '[email protected]'

0 commit comments

Comments
 (0)