-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathexample.py
More file actions
executable file
·22 lines (19 loc) · 886 Bytes
/
example.py
File metadata and controls
executable file
·22 lines (19 loc) · 886 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import metapy
def tokens_lowercase(doc):
#Write a token stream that tokenizes with ICUTokenizer (use the argument "suppress_tags=True"),
#lowercases, removes words with less than 2 and more than 5 characters
#performs stemming and creates trigrams (name the final call to ana.analyze as "trigrams")
'''Place your code here'''
#leave the rest of the code as is
tok.set_content(doc.content())
tokens, counts = [], []
for token, count in trigrams.items():
counts.append(count)
tokens.append(token)
return tokens
if __name__ == '__main__':
doc = metapy.index.Document()
doc.content("I said that I can't believe that it only costs $19.95! I could only find it for more than $30 before.")
print(doc.content()) #you can access the document string with .content()
tokens = tokens_lowercase(doc)
print(tokens)