@@ -60,7 +60,8 @@ def extract_bindings( data, template ):
60
60
if len (matches ) <= EXAMPLES_PER_TEMPLATE :
61
61
best_matches = matches
62
62
else :
63
- best_matches = sort_matches (matches , template )[0 :EXAMPLES_PER_TEMPLATE ]
63
+ #best_matches = sort_matches(matches, template)[0:EXAMPLES_PER_TEMPLATE]
64
+ best_matches = sorted (matches , template )[0 :EXAMPLES_PER_TEMPLATE ]
64
65
65
66
bindings = list ()
66
67
variables = getattr (template , 'variables' )
@@ -147,25 +148,29 @@ def prioritize_triple_match( usages ):
147
148
return sum (usages )
148
149
149
150
150
- def build_dataset_pair (binding , template , flag = False ):
151
+ def build_dataset_pair (binding , template , flag ):
151
152
print ('\n build_dataset_pair(binding, template):' )
152
153
english = getattr (template , 'question' )
153
154
sparql = getattr (template , 'query' )
154
155
for variable in binding :
155
156
uri = binding [variable ]['uri' ]
156
157
print (" uri = binding[variable]['uri']" )
157
158
print ('\n building dataset, the URI: ' , uri )
159
+ label = do_replacements (uri )
160
+ '''
158
161
if flag: # this is changed to fit the transformer model
159
162
label = binding[variable]['label']
160
163
else:
161
- label = do_replacements (uri )
164
+ label = do_replacements(uri) '''
162
165
placeholder = '<{}>' .format (str .upper (variable ))
163
166
if placeholder in english and label is not None :
164
167
print ('\n building dataset, the label: ' , label )
168
+ english = english .replace (placeholder , label )
169
+ '''
165
170
if flag: # this is changed to fit the transformer model
166
171
english = english.replace(placeholder, label )
167
172
else:
168
- english = english .replace (placeholder , strip_brackets (label ))
173
+ english = english.replace(placeholder, strip_brackets(label)) '''
169
174
print ('\n building dataset, the strip_brackets(label): ' , strip_brackets (label ))
170
175
print ('\n building dataset, the placeholder: ' , placeholder )
171
176
print ('\n building dataset, the english: ' , english )
@@ -177,28 +182,29 @@ def build_dataset_pair(binding, template, flag=False):
177
182
return dataset_pair
178
183
179
184
180
- def generate_dataset (templates , output_dir , file_mode , flag = False ):
185
+ def generate_dataset (templates , output_dir , file_mode , flag ):
181
186
cache = dict ()
182
187
if not os .path .exists (output_dir ):
183
188
os .makedirs (output_dir )
184
189
it = 0
185
- with open (output_dir + '/data_300 .en' , file_mode ) as english_questions , open (output_dir + '/data_300 .sparql' , file_mode ) as sparql_queries :
190
+ with open (output_dir + '/data .en' , file_mode ) as english_questions , open (output_dir + '/data .sparql' , file_mode ) as sparql_queries :
186
191
for template in templates :
187
192
it = it + 1
188
- # print( "for {}th template".format(it)
193
+ print ( "for {}th template" .format (it ) )
189
194
try :
190
195
results = get_results_of_generator_query (cache , template )
196
+ print (it , ' ---> ' , results , '\n ' )
191
197
bindings = extract_bindings (results ["results" ]["bindings" ], template )
192
- # print bindings
198
+ print ( bindings )
193
199
if bindings is None :
194
200
id_or_question = getattr (template , 'id' ) or getattr (template , 'question' )
195
201
logging .debug ("no data for {}" .format (id_or_question ))
196
202
not_instanced_templates .update ([id_or_question ])
197
203
continue
198
204
199
205
for binding in bindings :
200
- dataset_pair = build_dataset_pair (binding , template , flag = flag )
201
- # print "x ", det_pair
206
+ dataset_pair = build_dataset_pair (binding , template , flag )
207
+ print ( " \n dataset_pair---> " , dataset_pair )
202
208
if (dataset_pair ):
203
209
dataset_pair ['english' ] = " " .join (dataset_pair ['english' ].split ())
204
210
english_questions .write ("{}\n " .format (dataset_pair ['english' ]))
@@ -324,16 +330,18 @@ def normalize (ontology_class):
324
330
325
331
# reload(sys) to set default encoding
326
332
327
- reload (sys )
328
- sys .setdefaultencoding ("utf-8" )
333
+ #reload(sys)
334
+ #sys.setdefaultencoding("utf-8")
335
+ import importlib ,sys
336
+ importlib .reload (sys )
329
337
330
338
not_instanced_templates = collections .Counter ()
331
339
used_resources = collections .Counter (json .loads (open (resource_dump_file ).read ())) if use_resources_dump else collections .Counter ()
332
340
file_mode = 'a' if use_resources_dump else 'w'
333
341
templates = read_template_file (template_file )
334
- # print len(templates)
342
+ print ( len (templates ))
335
343
try :
336
- generate_dataset (templates , output_dir , file_mode , flag = flag )
344
+ generate_dataset (templates , output_dir , file_mode , flag )
337
345
# print "lol"
338
346
except :
339
347
#print 'exception occured, look for error in log file'
0 commit comments