Skip to content

Commit 3f39d10

Browse files
committed
update the generators
1 parent 34f9ee9 commit 3f39d10

File tree

2 files changed

+29
-17
lines changed

2 files changed

+29
-17
lines changed

generator.py

+22-14
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ def extract_bindings( data, template ):
6060
if len(matches) <= EXAMPLES_PER_TEMPLATE:
6161
best_matches = matches
6262
else:
63-
best_matches = sort_matches(matches, template)[0:EXAMPLES_PER_TEMPLATE]
63+
#best_matches = sort_matches(matches, template)[0:EXAMPLES_PER_TEMPLATE]
64+
best_matches = sorted(matches, template)[0:EXAMPLES_PER_TEMPLATE]
6465

6566
bindings = list()
6667
variables = getattr(template, 'variables')
@@ -147,25 +148,29 @@ def prioritize_triple_match( usages ):
147148
return sum(usages)
148149

149150

150-
def build_dataset_pair(binding, template, flag=False):
151+
def build_dataset_pair(binding, template, flag):
151152
print('\n build_dataset_pair(binding, template):')
152153
english = getattr(template, 'question')
153154
sparql = getattr(template, 'query')
154155
for variable in binding:
155156
uri = binding[variable]['uri']
156157
print(" uri = binding[variable]['uri']")
157158
print('\n building dataset, the URI: ', uri)
159+
label = do_replacements(uri)
160+
'''
158161
if flag: # this is changed to fit the transformer model
159162
label = binding[variable]['label']
160163
else:
161-
label = do_replacements(uri)
164+
label = do_replacements(uri) '''
162165
placeholder = '<{}>'.format(str.upper(variable))
163166
if placeholder in english and label is not None:
164167
print('\n building dataset, the label: ', label)
168+
english = english.replace(placeholder, label )
169+
'''
165170
if flag: # this is changed to fit the transformer model
166171
english = english.replace(placeholder, label )
167172
else:
168-
english = english.replace(placeholder, strip_brackets(label))
173+
english = english.replace(placeholder, strip_brackets(label)) '''
169174
print('\n building dataset, the strip_brackets(label): ', strip_brackets(label))
170175
print('\n building dataset, the placeholder: ', placeholder)
171176
print('\n building dataset, the english: ', english)
@@ -177,28 +182,29 @@ def build_dataset_pair(binding, template, flag=False):
177182
return dataset_pair
178183

179184

180-
def generate_dataset(templates, output_dir, file_mode, flag=False):
185+
def generate_dataset(templates, output_dir, file_mode, flag):
181186
cache = dict()
182187
if not os.path.exists(output_dir):
183188
os.makedirs(output_dir)
184189
it = 0
185-
with open(output_dir + '/data_300.en', file_mode) as english_questions, open(output_dir + '/data_300.sparql', file_mode) as sparql_queries:
190+
with open(output_dir + '/data.en', file_mode) as english_questions, open(output_dir + '/data.sparql', file_mode) as sparql_queries:
186191
for template in templates:
187192
it = it + 1
188-
#print( "for {}th template".format(it)
193+
print( "for {}th template".format(it) )
189194
try:
190195
results = get_results_of_generator_query(cache, template)
196+
print(it, ' ---> ', results, '\n' )
191197
bindings = extract_bindings(results["results"]["bindings"], template)
192-
# print bindings
198+
print( bindings)
193199
if bindings is None:
194200
id_or_question = getattr(template, 'id') or getattr(template, 'question')
195201
logging.debug("no data for {}".format(id_or_question))
196202
not_instanced_templates.update([id_or_question])
197203
continue
198204

199205
for binding in bindings:
200-
dataset_pair = build_dataset_pair(binding, template, flag=flag )
201-
# print "x", det_pair
206+
dataset_pair = build_dataset_pair(binding, template, flag )
207+
print( "\n dataset_pair---> ", dataset_pair)
202208
if (dataset_pair):
203209
dataset_pair['english'] = " ".join(dataset_pair['english'].split())
204210
english_questions.write("{}\n".format(dataset_pair['english']))
@@ -324,16 +330,18 @@ def normalize (ontology_class):
324330

325331
# reload(sys) to set default encoding
326332

327-
reload(sys)
328-
sys.setdefaultencoding("utf-8")
333+
#reload(sys)
334+
#sys.setdefaultencoding("utf-8")
335+
import importlib,sys
336+
importlib.reload(sys)
329337

330338
not_instanced_templates = collections.Counter()
331339
used_resources = collections.Counter(json.loads(open(resource_dump_file).read())) if use_resources_dump else collections.Counter()
332340
file_mode = 'a' if use_resources_dump else 'w'
333341
templates = read_template_file(template_file)
334-
#print len(templates)
342+
print(len(templates))
335343
try:
336-
generate_dataset(templates, output_dir, file_mode, flag=flag )
344+
generate_dataset(templates, output_dir, file_mode, flag )
337345
# print "lol"
338346
except:
339347
#print 'exception occured, look for error in log file'

generator_utils.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,14 @@
1111
1212
"""
1313
import collections
14-
import httplib
14+
import http.client as httplib
1515
import json
1616
import logging
1717
import re
1818
import sys
1919
import urllib
20-
import urllib2
20+
import urllib.request as urllib2
21+
from functools import reduce
2122

2223
ENDPOINT = "http://dbpedia.org/sparql"
2324
GRAPH = "http://dbpedia.org"
@@ -55,8 +56,11 @@ def query_dbpedia( query ):
5556
param["timeout"] = "600"
5657
param["debug"] = "on"
5758
try:
58-
resp = urllib2.urlopen(ENDPOINT + "?" + urllib.urlencode(param))
59+
#resp = urllib2.urlopen(ENDPOINT + "?" + urllib.urlencode(param))
60+
resp = urllib2.urlopen(ENDPOINT + "?" + urllib.parse.urlencode(param))
61+
print("\nparam--> ",param)
5962
j = resp.read()
63+
print("\nj-result--> ",j)
6064
resp.close()
6165
except (urllib2.HTTPError, httplib.BadStatusLine):
6266
logging.debug("*** Query error. Empty result set. ***")

0 commit comments

Comments
 (0)