-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwrapper.py
59 lines (50 loc) · 2.15 KB
/
wrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import requests
import re
import json
class GrapheneExtract(object):
def __init__(self, blob):
try:
self.json = blob['sentences']
self.json = {k: v for d in [i['extractionMap'] for i in self.json] for k, v in d.items()} # merge dicts
self.visited = {e:False for e in self.json} # (hash_id, bool is_used)
self.failed = False
except json.decoder.JSONDecodeError:
self.failed = True
def linearize(self):
''' Depth-first search and append on the extracts '''
# TODO: reorder simple and linked contexts based on position in the origional sentence
if self.failed:
return ""
self.strbuild = ""
for hashId in self.json:
self.visit(hashId)
self.strbuild = self.strbuild.replace(".", "")
self.strbuild = re.sub(r"\s+", ' ', self.strbuild)
return self.strbuild.strip()
def visit(self, hashId):
if self.visited[hashId]:
return
self.visited[hashId] = True
self.strbuild += " ( " + self.json[hashId]['arg1'] + " <> " + self.json[hashId]['relation'] + " <> " + self.json[hashId]['arg2']
for simple in self.json[hashId]['simpleContexts']:
if simple['classification'] != "NOUN_BASED": # These tend to be duplicated in complex extractions
if simple['classification'] == "TEMPORAL_BEFORE":
if simple['text'].lower().startswith("after"): # remove after tokens
simple['text'] = simple['text'][len("after"):]
if simple['classification'] == "TEMPORAL_AFTER":
if simple['text'].lower().startswith("before"):
simple['text'] = simple['text'][len("before"):]
self.strbuild += " " + simple['classification'] + " " + simple['text']
for child in self.json[hashId]['linkedContexts']:
if not self.visited[child['targetID']]:
self.strbuild += " " + child['classification'] + " "
self.visit(child['targetID'])
self.strbuild += " ) "
def extractList(self):
''' Output a list of all extracts (used for simple 3-tuples OpenIE) '''
toReturn = []
for i in self.json:
if self.failed or self.json[i]['arg1'] == "" or self.json[i]['relation'] == "" or self.json[i]['arg2'] == "":
continue
toReturn.append(self.json[i]['arg1'] + " <> " + self.json[i]['relation'] + " <> " + self.json[i]['arg2'])
return toReturn