Skip to content

Commit 0c5c65b

Browse files
committed
Use commons-text to determine the Levenshtein distance and implement a simple solution.
1 parent 343cc07 commit 0c5c65b

File tree

4 files changed

+35
-25
lines changed

4 files changed

+35
-25
lines changed

deps.edn

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
1-
{:paths ["src" "resources"]
2-
:deps {org.clojure/clojure {:mvn/version "1.10.0"}}
3-
:aliases {:test {:extra-paths ["test" "dev"]
4-
:extra-deps {org.clojure/test.check {:mvn/version "0.9.0"}}
5-
:main-opts ["-m" "cognitect.test-runner"]}}
6-
:mvn/repos {"central" {:url "https://repo1.maven.org/maven2/"}
7-
"clojars" {:url "https://clojars.org/repo"}}}
1+
{:paths ["src" "resources"],
2+
:deps
3+
{org.clojure/clojure #:mvn{:version "1.10.0"},
4+
org.apache.commons/commons-text #:mvn{:version "1.6"}},
5+
:aliases
6+
{:test
7+
{:extra-paths ["test" "dev"],
8+
:extra-deps #:org.clojure{test.check #:mvn{:version "0.9.0"}},
9+
:main-opts ["-m" "cognitect.test-runner"]}},
10+
:mvn/repos
11+
{"central" {:url "https://repo1.maven.org/maven2/"},
12+
"clojars" {:url "https://clojars.org/repo"}}}

src/spell-checker/dictionary.clj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
slurp
1414
split-at-line-breaks
1515
(mapcat #(string/split % #"-"))
16+
(filter #(>= (count %) 2))
17+
(map string/lower-case)
1618
set))
1719

1820
(defonce words (build-dictionary pt-br))

src/spell-checker/levenshtein.clj

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,7 @@
1-
(ns spell-checker.levenshtein)
1+
(ns spell-checker.levenshtein
2+
(:import (org.apache.commons.text.similarity LevenshteinDistance)))
23

3-
(defn- cost [word-a word-b]
4-
(if (= (last word-a) (last word-b))
5-
0
6-
1))
4+
(def ^:private levenshtein (LevenshteinDistance.))
75

8-
(declare distance)
9-
(defn- distance* [word-a word-b]
10-
(let [cost (cost word-a word-b)]
11-
(min
12-
(inc (distance (subs word-a 0 (dec (count word-a))) word-b))
13-
(inc (distance word-a (subs word-b 0 (dec (count word-b)))))
14-
(+ cost (distance (subs word-a 0 (dec (count word-a))) (subs word-b 0 (dec (count word-b))))))))
15-
16-
(defn- distance [word-a word-b]
17-
(cond
18-
(empty? word-a) (count word-b)
19-
(empty? word-b) (count word-a)
20-
:else (distance* word-a word-b)))
6+
(defn distance [word-a word-b]
7+
(.apply levenshtein word-a word-b))

src/spell-checker/solution.clj

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
(ns spell-checker.solution
2+
(:require [spell-checker.dictionary :as dictionary]
3+
[spell-checker.levenshtein :as levenshtein]))
4+
5+
(defn check-spelling [dict word]
6+
(if (dict word)
7+
{:result :correct-word}
8+
{:result :misspelled-word
9+
:suggestion (apply min-key (partial levenshtein/distance word) dict)}))
10+
11+
(comment
12+
"Evaluate the forms bellow to see the results"
13+
(check-spelling dictionary/words "cavalo")
14+
(check-spelling dictionary/words "viajem")
15+
(check-spelling dictionary/words "viajen")
16+
(check-spelling dictionary/words "vakaa"))

0 commit comments

Comments
 (0)