-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsyllables.py
executable file
·95 lines (67 loc) · 3 KB
/
syllables.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python3
import fileinput
import nltk
import argparse
from nltk.corpus import cmudict
# https://www.nltk.org/_modules/nltk/corpus/reader/cmudict.html
# vowels = ['AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'EH', 'ER', 'EY', 'IH', 'IY', 'OW', 'OY', 'UH', 'UW']
vowels = ['A', 'E', 'I', 'O', 'U']
def dump_dictionary():
for word, pronunciation in cmudict.entries():
s = count_from_pronunciation(pronunciation)
print(f'{word} {s}')
return
def count_from_pronunciation(pronunciation):
# vowel can have 1, 2, or 0 appended to indicate stress
return len([phoneme for phoneme in pronunciation if phoneme[:1] in vowels])
def count_from_word(word):
try:
pronunciations = cmudict.dict()[word.lower()]
except KeyError:
return []
return [count_from_pronunciation(pronunciation) for pronunciation in pronunciations]
def count_from_word_to_string(word):
syllable_counts = count_from_word(word)
if syllable_counts:
return ','.join([str(sc) for sc in syllable_counts])
return '0'
def filter_by_syllable(words, n):
return [word for word in words if n in count_from_word(word)]
def filter_by_syllables(words, nn):
return [word for word in words if set(nn).intersection(count_from_word(word))]
if __name__ == '__main__':
oparser = argparse.ArgumentParser(description="syllable filter",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
oparser.add_argument("-n", dest="syllable_list",
type=str,
help="comma-separated list of nbr of syllables")
oparser.add_argument('-c', dest='count',
default=False,
action='store_true',
help='add syllable count after each word')
oparser.add_argument('-b', dest='retain_blank',
default=False,
action='store_true',
help='retain blank lines in output')
oparser.add_argument('-d', dest='dump',
default=False,
action='store_true',
help='dump the dictionary')
oparser.add_argument('files', metavar='FILE', nargs='*',
help='input files')
options = oparser.parse_args()
if options.dump:
dump_dictionary()
elif options.syllable_list:
keep_counts = [int(s) for s in options.syllable_list.split(',')]
for line in fileinput.input(options.files):
tokens = line.strip().split()
result = ' '.join(filter_by_syllables(tokens, keep_counts))
if result or options.retain_blank:
print(result)
elif options.count:
for line in fileinput.input(options.files):
tokens = line.strip().split()
result = ' '.join([' '.join([token, count_from_word_to_string(token)]) for token in tokens])
if result or options.retain_blank:
print(result)