Skip to content

Commit de6dc4c

Browse files
committed
Rename package
1 parent 680c7d9 commit de6dc4c

33 files changed

+20
-21
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
.idea/
22
.vscode/
33
__pycache__/
4-
similarity/__pycache__/
54
venv/
65
build/
76
dist/

README.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ The Levenshtein distance between two words is the minimum number of single-chara
103103
It is a metric string distance. This implementation uses dynamic programming (Wagner–Fischer algorithm), with only 2 rows of data. The space requirement is thus O(m) and the algorithm runs in O(m.n).
104104

105105
```python
106-
from similarity.levenshtein import Levenshtein
106+
from strsim.levenshtein import Levenshtein
107107

108108
levenshtein = Levenshtein()
109109
print(levenshtein.distance('My string', 'My $string'))
@@ -119,7 +119,7 @@ This distance is computed as levenshtein distance divided by the length of the l
119119
The similarity is computed as 1 - normalized distance.
120120

121121
```python
122-
from similarity.normalized_levenshtein import NormalizedLevenshtein
122+
from strsim.normalized_levenshtein import NormalizedLevenshtein
123123

124124
normalized_levenshtein = NormalizedLevenshtein()
125125
print(normalized_levenshtein.distance('My string', 'My $string'))
@@ -140,8 +140,8 @@ This algorithm is usually used for optical character recognition (OCR) applicati
140140
It can also be used for keyboard typing auto-correction. Here the cost of substituting E and R is lower for example because these are located next to each other on an AZERTY or QWERTY keyboard. Hence the probability that the user mistyped the characters is higher.
141141

142142
```python
143-
from similarity.weighted_levenshtein import WeightedLevenshtein
144-
from similarity.weighted_levenshtein import CharacterSubstitutionInterface
143+
from strsim.weighted_levenshtein import WeightedLevenshtein
144+
from strsim.weighted_levenshtein import CharacterSubstitutionInterface
145145

146146
class CharacterSubstitution(CharacterSubstitutionInterface):
147147
def cost(self, c0, c1):
@@ -162,7 +162,7 @@ It does respect triangle inequality, and is thus a metric distance.
162162
This is not to be confused with the optimal string alignment distance, which is an extension where no substring can be edited more than once.
163163

164164
```python
165-
from similarity.damerau import Damerau
165+
from strsim.damerau import Damerau
166166

167167
damerau = Damerau()
168168
print(damerau.distance('ABCDEF', 'ABDCEF'))
@@ -192,7 +192,7 @@ The difference from the algorithm for Levenshtein distance is the addition of on
192192
Note that for the optimal string alignment distance, the triangle inequality does not hold and so it is not a true metric.
193193

194194
```python
195-
from similarity.optimal_string_alignment import OptimalStringAlignment
195+
from strsim.optimal_string_alignment import OptimalStringAlignment
196196

197197
optimal_string_alignment = OptimalStringAlignment()
198198
print(optimal_string_alignment.distance('CA', 'ABC'))
@@ -214,7 +214,7 @@ It is (roughly) a variation of Damerau-Levenshtein, where the substitution of 2
214214
The distance is computed as 1 - Jaro-Winkler similarity.
215215

216216
```python
217-
from similarity.jarowinkler import JaroWinkler
217+
from strsim.jarowinkler import JaroWinkler
218218

219219
jarowinkler = JaroWinkler()
220220
print(jarowinkler.similarity('My string', 'My tsring'))
@@ -246,7 +246,7 @@ This class implements the dynamic programming approach, which has a space requir
246246
In "Length of Maximal Common Subsequences", K.S. Larsen proposed an algorithm that computes the length of LCS in time O(log(m).log(n)). But the algorithm has a memory requirement O(m.n²) and was thus not implemented here.
247247

248248
```python
249-
from similarity.longest_common_subsequence import LongestCommonSubsequence
249+
from strsim.longest_common_subsequence import LongestCommonSubsequence
250250

251251
lcs = LongestCommonSubsequence()
252252
# Will produce 4.0
@@ -263,7 +263,7 @@ http://heim.ifi.uio.no/~danielry/StringMetric.pdf
263263
The distance is computed as 1 - |LCS(s1, s2)| / max(|s1|, |s2|)
264264

265265
```python
266-
from similarity.metric_lcs import MetricLCS
266+
from strsim.metric_lcs import MetricLCS
267267

268268
metric_lcs = MetricLCS()
269269
s1 = 'ABCDEFG'
@@ -300,7 +300,7 @@ The algorithm uses affixing with special character '\n' to increase the weight o
300300
In the paper, Kondrak also defines a similarity measure, which is not implemented (yet).
301301

302302
```python
303-
from similarity.ngram import NGram
303+
from strsim.ngram import NGram
304304

305305
twogram = NGram(2)
306306
print(twogram.distance('ABCD', 'ABTUIO'))
@@ -320,7 +320,7 @@ The cost for computing these similarities and distances is mainly domnitated by
320320
Directly compute the distance between strings:
321321

322322
```python
323-
from similarity.qgram import QGram
323+
from strsim.qgram import QGram
324324

325325
qgram = QGram(2)
326326
print(qgram.distance('ABCD', 'ABCE'))
@@ -330,7 +330,7 @@ print(qgram.distance('ABCD', 'ABCE'))
330330
Or, for large datasets, pre-compute the profile of all strings. The similarity can then be computed between profiles:
331331

332332
```python
333-
from similarity.cosine import Cosine
333+
from strsim.cosine import Cosine
334334

335335
cosine = Cosine(2)
336336
s0 = 'My first string'
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

similarity/jarowinkler_test.py renamed to strsim/jarowinkler_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def test_jarowinkler(self):
3232
s2 = "上海"
3333
s3 = "上海市"
3434
distance_format = "distance: {:.4}\t between {} and {}"
35-
similarity_format = "similarity: {:.4}\t between {} and {}"
35+
similarity_format = "strsim: {:.4}\t between {} and {}"
3636
print(distance_format.format(str(a.distance(s0, s1)), s0, s1))
3737
print(distance_format.format(str(a.distance(s0, s2)), s0, s2))
3838
print(distance_format.format(str(a.distance(s0, s3)), s0, s3))
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

similarity/normalized_levenshtein_test.py renamed to strsim/normalized_levenshtein_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def test_normalized_levenshtein(self):
3232
s2 = "上海"
3333
s3 = "上海市"
3434
distance_format = "distance: {:.4}\t between {} and {}"
35-
similarity_format = "similarity: {:.4}\t between {} and {}"
35+
similarity_format = "strsim: {:.4}\t between {} and {}"
3636
print(distance_format.format(str(a.distance(s0, s1)), s0, s1))
3737
print(distance_format.format(str(a.distance(s0, s2)), s0, s2))
3838
print(distance_format.format(str(a.distance(s0, s3)), s0, s3))
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

similarity/sorensen_dice_test.py renamed to strsim/sorensen_dice_test.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,19 @@ def test_sorensen_dice(self):
3232
s2 = "上海"
3333
s3 = "上海市"
3434
distance_format = "distance: {:.4}\t between {} and {}"
35-
similarity_format = "similarity: {:.4}\t between {} and {}"
35+
similarity_format = "strsim: {:.4}\t between {} and {}"
3636
# print(distance_format.format(str(a.distance(s0, s1)), s0, s1))
3737
# print(distance_format.format(str(a.distance(s0, s2)), s0, s2))
3838
# print(distance_format.format(str(a.distance(s0, s3)), s0, s3))
3939
# print(distance_format.format(str(a.distance(s1, s2)), s1, s2))
4040
# print(distance_format.format(str(a.distance(s1, s3)), s1, s3))
4141
print(distance_format.format(str(a.distance(s2, s3)), s2, s3))
4242

43-
# print(similarity_format.format(str(a.similarity(s0, s1)), s0, s1))
44-
# print(similarity_format.format(str(a.similarity(s0, s2)), s0, s2))
45-
# print(similarity_format.format(str(a.similarity(s0, s3)), s0, s3))
46-
# print(similarity_format.format(str(a.similarity(s1, s2)), s1, s2))
47-
# print(similarity_format.format(str(a.similarity(s1, s3)), s1, s3))
43+
# print(similarity_format.format(str(a.strsim(s0, s1)), s0, s1))
44+
# print(similarity_format.format(str(a.strsim(s0, s2)), s0, s2))
45+
# print(similarity_format.format(str(a.strsim(s0, s3)), s0, s3))
46+
# print(similarity_format.format(str(a.strsim(s1, s2)), s1, s2))
47+
# print(similarity_format.format(str(a.strsim(s1, s3)), s1, s3))
4848
print(similarity_format.format(str(a.similarity(s2, s3)), s2, s3))
4949

5050

File renamed without changes.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)