-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathstring_parse.py
More file actions
46 lines (37 loc) · 1.09 KB
/
string_parse.py
File metadata and controls
46 lines (37 loc) · 1.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import nltk
nltk.download('punkt')
sentence1 = "your report may, if you wish, include some images or video material."
sentence2 = "apple bottom jeans have cheese."
sentence_list = [sentence1, sentence2]
tokens = []
for sentence in sentence_list:
tokens.append(nltk.word_tokenize(sentence))
print(tokens)
output2 = "[[0 15 5 (Xp) [2 4 3 (Xx) [2 4 3 (Xx)"
output = output2.replace('[', '').split()
index = 0
start = 0
end = 0
global_features = set()
description_features = set()
for token in tokens:
for i in output:
if index % 4 == 0:
start = int(i)
elif index % 4 == 1:
end = int(i)
print(end)
elif index % 4 == 2:
pass
elif index % 4 == 3:
link = i.replace('(', '').replace(')', '')
if start == 0:
word1 = "left_wall"
word2 = token[end - 1]
else:
word1 = token[start - 1]
word2 = token[end - 1]
list_of_features.add((word1, word2, link))
print(list_of_features)
index += 1
print(output)