|
1 | 1 | import json
|
2 | 2 | import string
|
3 | 3 |
|
4 |
| -class NoMatchingIntentError(Exception): |
5 |
| - pass |
| 4 | +class IntentMatcher: |
| 5 | + def __init__(self, intents_file_path): |
| 6 | + self.intents_file_path = intents_file_path |
| 7 | + self.intents = self.train() |
6 | 8 |
|
7 |
| -def Tokenize(input_string): |
8 |
| - input_string = input_string.strip() |
9 |
| - input_string = input_string.translate(str.maketrans("", "", string.punctuation)) |
10 |
| - words = input_string.split() |
| 9 | + class NoMatchingIntentError(Exception): |
| 10 | + pass |
11 | 11 |
|
12 |
| - words = stem_list(words) |
| 12 | + def Tokenize(self, input_string): |
| 13 | + input_string = input_string.strip() |
| 14 | + input_string = input_string.translate(str.maketrans("", "", string.punctuation)) |
| 15 | + words = input_string.split() |
13 | 16 |
|
14 |
| - return words |
| 17 | + words = self.stem_list(words) |
15 | 18 |
|
16 |
| -def Tokenize_List(input_list): |
17 |
| - stemmedwords = [] |
18 |
| - for word in input_list: |
19 |
| - token = Tokenize(word) |
20 |
| - Tokenwords.append(token) |
| 19 | + return words |
21 | 20 |
|
22 |
| - return Tokenwords |
| 21 | + def Tokenize_List(self, input_list): |
| 22 | + Tokenwords = [] |
| 23 | + for word in input_list: |
| 24 | + token = self.Tokenize(word) |
| 25 | + Tokenwords.append(token) |
23 | 26 |
|
24 |
| -def train(intents_file_path): |
25 |
| - with open(intents_file_path, 'r') as json_data: |
26 |
| - intents = json.load(json_data) |
27 |
| - return intents |
| 27 | + return Tokenwords |
28 | 28 |
|
29 |
| -def patterncompare(input_string, intents_file_path): |
30 |
| - input_string = input_string.lower() |
31 |
| - HighestSimilarity = 0 |
32 |
| - MostSimilarPattern = None |
33 |
| - SimilarityPercentage = 0 |
| 29 | + def train(self): |
| 30 | + with open(self.intents_file_path, 'r') as json_data: |
| 31 | + intents = json.load(json_data) |
| 32 | + return intents |
34 | 33 |
|
35 |
| - patterns = [] |
36 |
| - Similarity = 0 |
| 34 | + def patterncompare(self, input_string): |
| 35 | + input_string = input_string.lower() |
| 36 | + HighestSimilarity = 0 |
| 37 | + MostSimilarPattern = None |
| 38 | + SimilarityPercentage = 0 |
37 | 39 |
|
38 |
| - with open(intents_file_path, 'r') as json_data: |
39 |
| - intents = json.load(json_data) |
| 40 | + patterns = [] |
| 41 | + Similarity = 0 |
| 42 | + |
| 43 | + WordList2 = self.Tokenize(input_string) |
| 44 | + |
| 45 | + for intent_class in self.intents['intents']: |
| 46 | + OverallWordList = [] |
| 47 | + Similarity = 0 |
| 48 | + |
| 49 | + patterns = intent_class.get('patterns') |
| 50 | + for pattern in patterns: |
| 51 | + WordList = [] |
| 52 | + pattern = pattern.lower() |
| 53 | + WordList = self.Tokenize(pattern) |
| 54 | + OverallWordList.append(WordList) |
| 55 | + NewList = [] |
| 56 | + NewBag = [] |
| 57 | + |
| 58 | + for word in WordList: |
| 59 | + word = self.stem(word) |
| 60 | + NewList.append(word) |
| 61 | + |
| 62 | + for word in WordList2: |
| 63 | + word = self.stem(word) |
| 64 | + NewBag.append(word) |
40 | 65 |
|
41 |
| - WordList2 = Tokenize(input_string) |
| 66 | + WordList = NewList |
| 67 | + WordList2 = NewBag |
42 | 68 |
|
43 |
| - for intent_class in intents['intents']: |
44 |
| - OverallWordList = [] |
| 69 | + for word in WordList2: |
| 70 | + if word in WordList: |
| 71 | + Similarity = Similarity + 1 |
| 72 | + |
| 73 | + if Similarity > HighestSimilarity: |
| 74 | + SimilarityPercentage = Similarity / len(OverallWordList + WordList2) |
| 75 | + HighestSimilarity = Similarity |
| 76 | + MostSimilarPattern = intent_class |
| 77 | + |
| 78 | + print(f"Similarity: {SimilarityPercentage:.2f}%") |
| 79 | + |
| 80 | + if MostSimilarPattern: |
| 81 | + return MostSimilarPattern |
| 82 | + else: |
| 83 | + raise self.NoMatchingIntentError("No matching intent class found.") |
| 84 | + |
| 85 | + def responsecompare(self, input_string, intent_class): |
| 86 | + input_string = input_string.lower() |
| 87 | + HighestSimilarity = 0 |
| 88 | + SimilarityPercentage = 0 |
| 89 | + MostSimilarResponse = None |
| 90 | + |
| 91 | + responses = [] |
45 | 92 | Similarity = 0
|
46 | 93 |
|
47 |
| - patterns = intent_class.get('patterns') |
48 |
| - for pattern in patterns: |
49 |
| - WordList = [] |
50 |
| - pattern = pattern.lower() |
51 |
| - WordList = Tokenize(pattern) |
52 |
| - OverallWordList.append(WordList) |
| 94 | + WordList2 = self.Tokenize(input_string) |
| 95 | + |
| 96 | + if intent_class is not None: |
| 97 | + responses = intent_class.get('responses') |
| 98 | + else: |
| 99 | + raise self.NoMatchingIntentError("No matching intent class found.") |
| 100 | + |
| 101 | + for response in responses: |
| 102 | + |
| 103 | + Similarity = 0 |
| 104 | + pattern = response.lower() |
| 105 | + WordList = self.Tokenize(response) |
53 | 106 | NewList = []
|
54 | 107 | NewBag = []
|
55 | 108 |
|
56 | 109 | for word in WordList:
|
57 |
| - word = stem(word) |
| 110 | + word = self.stem(word) |
58 | 111 | NewList.append(word)
|
59 | 112 |
|
60 | 113 | for word in WordList2:
|
61 |
| - word = stem(word) |
| 114 | + word = self.stem(word) |
62 | 115 | NewBag.append(word)
|
63 | 116 |
|
64 | 117 | WordList = NewList
|
65 | 118 | WordList2 = NewBag
|
66 | 119 |
|
67 | 120 | for word in WordList2:
|
68 | 121 | if word in WordList:
|
69 |
| - Similarity = Similarity + 1 |
| 122 | + Similarity = (Similarity+1/len(WordList + WordList2)) |
70 | 123 |
|
71 | 124 | if Similarity > HighestSimilarity:
|
72 |
| - SimilarityPercentage = Similarity / len(OverallWordList + WordList2) |
73 |
| -# Just to test the mathy bit idk am autistic and gay so maths isn't strong suit print(len(OverallWordList+WordList2s) |
| 125 | + SimilarityPercentage = Similarity * 100 |
74 | 126 | HighestSimilarity = Similarity
|
75 |
| - MostSimilarPattern = intent_class |
76 |
| - |
77 |
| - print(f"Similarity: {SimilarityPercentage:.2f}%") |
78 |
| - |
79 |
| - if MostSimilarPattern: |
80 |
| - return MostSimilarPattern |
81 |
| - else: |
82 |
| - raise NoMatchingIntentError("No matching intent class found.") |
83 |
| - |
84 |
| -def responsecompare(input_string, intents_file_path, intent_class): |
85 |
| - input_string = input_string.lower() |
86 |
| - HighestSimilarity = 0 |
87 |
| - SimilarityPercentage = 0 |
88 |
| - MostSimilarResponse = None |
89 |
| - |
90 |
| - responses = [] |
91 |
| - Similarity = 0 |
92 |
| - |
93 |
| - with open(intents_file_path, 'r') as json_data: |
94 |
| - intents = json.load(json_data) |
95 |
| - |
96 |
| - WordList2 = Tokenize(input_string) |
97 |
| - |
98 |
| - if intent_class is not None: |
99 |
| - responses = intent_class.get('responses') |
100 |
| - else: |
101 |
| - raise NoMatchingIntentError("No matching intent class found.") |
102 |
| - |
103 |
| - for response in responses: |
104 |
| - |
105 |
| - Similarity = 0 |
106 |
| - pattern = response.lower() |
107 |
| - WordList = Tokenize(response) |
108 |
| - NewList = [] |
109 |
| - NewBag = [] |
110 |
| - |
111 |
| - for word in WordList: |
112 |
| - word = stem(word) |
113 |
| - NewList.append(word) |
114 |
| - |
115 |
| - for word in WordList2: |
116 |
| - word = stem(word) |
117 |
| - NewBag.append(word) |
118 |
| - |
119 |
| - WordList = NewList |
120 |
| - WordList2 = NewBag |
121 |
| - |
122 |
| - for word in WordList2: |
123 |
| - if word in WordList: |
124 |
| - Similarity = (Similarity+1/len(WordList + WordList2)) |
125 |
| - |
126 |
| - if Similarity > HighestSimilarity: |
127 |
| - SimilarityPercentage = Similarity * 100 |
128 |
| - HighestSimilarity = Similarity |
129 |
| - MostSimilarResponse = response |
130 |
| - |
131 |
| - print(f"Similarity: {SimilarityPercentage:.2f}%") |
132 |
| - |
133 |
| - # Convert MSR back into original string |
134 |
| - for response in responses: |
135 |
| - lowresponselist = [] |
136 |
| - lowresponse = response.lower() |
137 |
| - lowresponselist = stem_sentence(lowresponse) |
138 |
| - |
139 |
| - for lowresponse in lowresponselist: |
140 |
| - if lowresponse == MostSimilarResponse: |
141 |
| - MostSImilarResponse = response |
142 |
| - |
143 |
| - return MostSimilarResponse |
144 |
| - |
145 |
| -def stem(input_word): |
146 |
| - suffixes = ['ing', 'ly', 'ed', 'es', "'s", 'er', 'est', 'y', 'ily', 'able', 'ful', 'ness', 'less', 'ment', 'ive', 'ize', 'ous'] |
147 |
| - for suffix in suffixes: |
148 |
| - if input_word.endswith(suffix): |
149 |
| - input_word = input_word[:-len(suffix)] |
150 |
| - break |
151 |
| - return input_word |
152 |
| - |
153 |
| -def stem_sentence(input_string): |
154 |
| - wordlist = [] |
155 |
| - stemmedwords = [] |
156 |
| - wordlist = input_string.split() |
157 |
| - for input_word in wordlist: |
158 |
| - word = stem(input_word) |
159 |
| - stemmedwords.append(word) |
160 |
| - |
161 |
| - return stemmedwords |
162 |
| - |
163 |
| -def stem_list(input_list): |
164 |
| - stemmedwords = [] |
165 |
| - for word in input_list: |
166 |
| - stemmedword = stem(word) |
167 |
| - stemmedwords.append(stemmedword) |
168 |
| - |
169 |
| - return stemmedwords |
| 127 | + MostSimilarResponse = response |
| 128 | + |
| 129 | + print(f"Similarity: {SimilarityPercentage:.2f}%") |
| 130 | + |
| 131 | + # Convert MSR back into the original string |
| 132 | + for response in responses: |
| 133 | + lowresponselist = [] |
| 134 | + lowresponse = response.lower() |
| 135 | + lowresponselist = self.stem_sentence(lowresponse) |
| 136 | + |
| 137 | + for lowresponse in lowresponselist: |
| 138 | + if lowresponse == MostSimilarResponse: |
| 139 | + MostSimilarResponse = response |
| 140 | + |
| 141 | + return MostSimilarResponse |
| 142 | + |
| 143 | + def stem(self, input_word): |
| 144 | + suffixes = ['ing', 'ly', 'ed', 'es', "'s", 'er', 'est', 'y', 'ily', 'able', 'ful', 'ness', 'less', 'ment', 'ive', 'ize', 'ous'] |
| 145 | + for suffix in suffixes: |
| 146 | + if input_word.endswith(suffix): |
| 147 | + input_word = input_word[:-len(suffix)] |
| 148 | + break |
| 149 | + return input_word |
| 150 | + |
| 151 | + def stem_sentence(self, input_string): |
| 152 | + wordlist = [] |
| 153 | + stemmedwords = [] |
| 154 | + wordlist = input_string.split() |
| 155 | + for input_word in wordlist: |
| 156 | + word = self.stem(input_word) |
| 157 | + stemmedwords.append(word) |
| 158 | + |
| 159 | + return stemmedwords |
| 160 | + |
| 161 | + def stem_list(self, input_list): |
| 162 | + stemmedwords = [] |
| 163 | + for word in input_list: |
| 164 | + stemmedword = self.stem(word) |
| 165 | + stemmedwords.append(stemmedword) |
| 166 | + |
| 167 | + return stemmedwords |
0 commit comments