Skip to content

Commit c634045

Browse files
authored
Merge pull request #3 from pmalgorzata/zero-division-solution
added regex prior to profiles calculation
2 parents b688fd7 + af7ea43 commit c634045

File tree

2 files changed

+13
-0
lines changed

2 files changed

+13
-0
lines changed

similarity/cosine.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
from .string_distance import NormalizedStringDistance
2525
from .string_similarity import NormalizedStringSimilarity
2626

27+
import re
28+
_SPACE_PATTERN = re.compile("\\s+")
29+
2730

2831
class Cosine(ShingleBased, NormalizedStringDistance,
2932
NormalizedStringSimilarity):
@@ -41,6 +44,10 @@ def similarity(self, s0, s1):
4144
raise TypeError("Argument s1 is NoneType.")
4245
if s0 == s1:
4346
return 1.0
47+
48+
s0 = _SPACE_PATTERN.sub("", s0)
49+
s1 = _SPACE_PATTERN.sub("", s1)
50+
4451
if len(s0) < self.get_k() or len(s1) < self.get_k():
4552
return 0.0
4653
profile0 = self.get_profile(s0)

similarity/jaccard.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
from .string_distance import NormalizedStringDistance, MetricStringDistance
2323
from .string_similarity import NormalizedStringSimilarity
2424

25+
import re
26+
_SPACE_PATTERN = re.compile("\\s+")
2527

2628
class Jaccard(ShingleBased, MetricStringDistance, NormalizedStringDistance, NormalizedStringSimilarity):
2729

@@ -38,6 +40,10 @@ def similarity(self, s0, s1):
3840
raise TypeError("Argument s1 is NoneType.")
3941
if s0 == s1:
4042
return 1.0
43+
44+
s0 = _SPACE_PATTERN.sub("", s0)
45+
s1 = _SPACE_PATTERN.sub("", s1)
46+
4147
if len(s0) < self.get_k() or len(s1) < self.get_k():
4248
return 0.0
4349
profile0 = self.get_profile(s0)

0 commit comments

Comments
 (0)