Skip to content

Commit 680c7d9

Browse files
committed
Fixed #1 ZeroDivisionError
1 parent 9a74795 commit 680c7d9

File tree

3 files changed

+13
-41
lines changed

3 files changed

+13
-41
lines changed

similarity/cosine_test.py

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,26 +26,12 @@
2626
class TestCosine(unittest.TestCase):
2727

2828
def test_cosine(self):
29-
a = Cosine(1)
30-
s0 = ""
31-
s1 = ""
32-
s2 = "上海"
33-
s3 = "上海市"
34-
distance_format = "distance: {:.4}\t between {} and {}"
35-
similarity_format = "similarity: {:.4}\t between {} and {}"
36-
print(distance_format.format(str(a.distance(s0, s1)), s0, s1))
37-
print(distance_format.format(str(a.distance(s0, s2)), s0, s2))
38-
print(distance_format.format(str(a.distance(s0, s3)), s0, s3))
39-
print(distance_format.format(str(a.distance(s1, s2)), s1, s2))
40-
print(distance_format.format(str(a.distance(s1, s3)), s1, s3))
41-
print(distance_format.format(str(a.distance(s2, s3)), s2, s3))
42-
43-
print(similarity_format.format(str(a.similarity(s0, s1)), s0, s1))
44-
print(similarity_format.format(str(a.similarity(s0, s2)), s0, s2))
45-
print(similarity_format.format(str(a.similarity(s0, s3)), s0, s3))
46-
print(similarity_format.format(str(a.similarity(s1, s2)), s1, s2))
47-
print(similarity_format.format(str(a.similarity(s1, s3)), s1, s3))
48-
print(similarity_format.format(str(a.similarity(s2, s3)), s2, s3))
29+
cos = Cosine(1)
30+
s = ['', ' ', 'Shanghai', 'ShangHai', 'Shang Hai']
31+
for i in range(len(s)):
32+
for j in range(i, len(s)):
33+
print('dis between \'%s\' and \'%s\': %.4f' % (s[i], s[j], cos.distance(s[i], s[j])))
34+
print('sim between \'%s\' and \'%s\': %.4f' % (s[i], s[j], cos.similarity(s[i], s[j])))
4935

5036

5137
if __name__ == "__main__":

similarity/jaccard_test.py

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,26 +26,12 @@
2626
class TestJaccard(unittest.TestCase):
2727

2828
def test_jaccard(self):
29-
a = Jaccard(1)
30-
s0 = ""
31-
s1 = ""
32-
s2 = "上海"
33-
s3 = "上海市"
34-
distance_format = "distance: {:.4}\t between {} and {}"
35-
similarity_format = "similarity: {:.4}\t between {} and {}"
36-
print(distance_format.format(str(a.distance(s0, s1)), s0, s1))
37-
print(distance_format.format(str(a.distance(s0, s2)), s0, s2))
38-
print(distance_format.format(str(a.distance(s0, s3)), s0, s3))
39-
print(distance_format.format(str(a.distance(s1, s2)), s1, s2))
40-
print(distance_format.format(str(a.distance(s1, s3)), s1, s3))
41-
print(distance_format.format(str(a.distance(s2, s3)), s2, s3))
42-
43-
print(similarity_format.format(str(a.similarity(s0, s1)), s0, s1))
44-
print(similarity_format.format(str(a.similarity(s0, s2)), s0, s2))
45-
print(similarity_format.format(str(a.similarity(s0, s3)), s0, s3))
46-
print(similarity_format.format(str(a.similarity(s1, s2)), s1, s2))
47-
print(similarity_format.format(str(a.similarity(s1, s3)), s1, s3))
48-
print(similarity_format.format(str(a.similarity(s2, s3)), s2, s3))
29+
jaccard = Jaccard(1)
30+
s = ['', ' ', 'Shanghai', 'ShangHai', 'Shang Hai']
31+
for i in range(len(s)):
32+
for j in range(i, len(s)):
33+
print('dis between \'%s\' and \'%s\': %.4f' % (s[i], s[j], jaccard.distance(s[i], s[j])))
34+
print('sim between \'%s\' and \'%s\': %.4f' % (s[i], s[j], jaccard.similarity(s[i], s[j])))
4935

5036

5137
if __name__ == "__main__":

similarity/shingle_based.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def get_k(self):
3333

3434
def get_profile(self, string):
3535
shingles = dict()
36-
no_space_str = _SPACE_PATTERN.sub("", string)
36+
no_space_str = _SPACE_PATTERN.sub(" ", string)
3737
for i in range(len(no_space_str) - self.k + 1):
3838
shingle = no_space_str[i:i + self.k]
3939
old = shingles.get(shingle)

0 commit comments

Comments
 (0)