Skip to content

Commit 9b7d0f2

Browse files
author
Tomasz bla Fortuna
committed
Change must token API and prepare a 1.0 release.
- Breaking API change, bumping major from 0 to 1. - Handle clippy suggestions. - There's only one error, but make it explicit. - .cloned() - get_index helper to make use of ? and unindent code.
1 parent c4071cc commit 9b7d0f2

File tree

11 files changed

+102
-86
lines changed

11 files changed

+102
-86
lines changed

Cargo.lock

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "fuzzdex"
3-
version = "0.7.0"
3+
version = "1.0.0"
44
authors = ["Tomasz bla Fortuna <[email protected]>"]
55
edition = "2021"
66
license = "MIT"

Pipfile

-3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,3 @@ ipython = "*"
1111
[dev-packages]
1212
pytest = "==7.1.2"
1313
ipython = "*"
14-
15-
[requires]
16-
python_version = "3"

README.md

+7-5
Original file line numberDiff line numberDiff line change
@@ -67,23 +67,25 @@ cities.finish()
6767
streets.finish()
6868

6969
# warszawa matches warsaw at editing distance 2.
70-
cities.search("warszawa", [], max_distance=2, limit=60)
70+
cities.search(["warszawa"], [], max_distance=2, limit=60)
7171
# [{'origin': 'Warsaw', 'index': 1, 'token': 'warsaw',
7272
# 'distance': 2, 'score': 200000.0, 'should_score': 0.0}]
73-
73+
#
74+
# NOTE: Currently only a single `must` token is supported.
75+
#
7476
# `świat` adds additional should score to the result and places it higher
7577
# in case the limit is set:
76-
streets.search("nowy", ["świat"], max_distance=2, constraint=1)
78+
streets.search(["nowy"], ["świat"], max_distance=2, constraint=1)
7779
# [{'origin': 'Nowy Świat', 'index': 2, 'token': 'nowy',
7880
# 'distance': 0, 'score': 5.999, 'should_score': 7.4999}]
7981

8082
# Won't match with constraint 2.
81-
streets.search("nowy", ["świat"], constraint=2)
83+
streets.search(["nowy"], ["świat"], constraint=2)
8284
# []
8385

8486
# Quering for `czerniawska` will return `czerniakowska` (no constraints),
8587
# but with a lower score and higher distance:
86-
In [22]: streets.search("czerniawska", [], max_distance=2)
88+
In [22]: streets.search(["czerniawska"], [], max_distance=2)
8789
Out[22]:
8890
# [{'origin': 'Czerniawska', 'index': 4, 'token': 'czerniawska',
8991
# 'distance': 0, 'score': 9.49995231628418, 'should_score': 0.0},

src/fuzzdex.rs

+11
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,17 @@ pub mod seeker;
1111
#[cfg(test)]
1212
mod tests;
1313

14+
/// Single possible error type.
15+
#[derive(Debug)]
16+
pub struct DuplicateId;
17+
18+
impl std::fmt::Display for DuplicateId {
19+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
20+
write!(f, "Duplicated Phrase ID")
21+
}
22+
}
23+
impl std::error::Error for DuplicateId {}
24+
1425
/// Token owning a trigram is uniquely identified by phrase index + token index.
1526
#[derive(Debug)]
1627
struct Position {

src/fuzzdex/indexer.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
use std::collections::HashMap;
2-
use std::collections::HashSet;
1+
use std::collections::{HashMap, HashSet};
32

43
use crate::utils;
54
use super::*;
@@ -45,9 +44,9 @@ impl Indexer {
4544
/// fuzzy-matched tokens. Phrase index must be unique within the index (or
4645
/// Err is returned) and can reference some external dictionary.
4746
pub fn add_phrase(&mut self, phrase: &str, phrase_idx: usize,
48-
constraints: Option<&HashSet<usize, FastHash>>) -> Result<(), ()> {
47+
constraints: Option<&HashSet<usize, FastHash>>) -> Result<(), DuplicateId> {
4948
if self.phrases.contains_key(&phrase_idx) {
50-
Err(())
49+
Err(DuplicateId {})
5150
} else {
5251
let entry = PhraseEntry::new(phrase_idx, phrase, constraints);
5352
for (token_idx, token) in entry.tokens.iter().enumerate() {

src/fuzzdex/query.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,16 @@ impl Query {
2323
/// pass a single token, but if the internal tokenizer splits must into many
2424
/// tokens, the longest will be set as a `must` and others moved to
2525
/// `should`.
26-
pub fn new(must: &str, should: &[&str]) -> Self {
26+
///
27+
/// Currently only a single `must` token is allowed, but API accepts list
28+
/// for future compatibility.
29+
pub fn new(must: &[&str], should: &[&str]) -> Self {
30+
2731
let mut should_tokens: Vec<String> = should.iter().map(|s| s.to_string()).collect();
2832

33+
assert!(must.len() == 1, "Only one `must token` is currently supported");
34+
let must = must[0];
35+
2936
/* Sometimes must token passed in query is not tokenized in the same way we do */
3037
let mut tokens: Vec<String> = utils::tokenize(must, 1);
3138
let must_token: String = if tokens.len() > 1 {

src/fuzzdex/seeker.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ impl Index {
112112
/* LRU cache updates position even on get and needs mutable reference */
113113
{
114114
let mut cache = self.cache.lock().unwrap();
115-
let heatmap = cache.heatmaps.get(token).map(|h| h.clone());
115+
let heatmap = cache.heatmaps.get(token).cloned();
116116
if let Some(heatmap) = heatmap {
117117
/* We operate on reference-counted heatmaps to eliminate unnecessary copying */
118118
cache.stats.hits += 1;

src/fuzzdex/tests.rs

+14-14
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ fn it_works() {
1818
assert_eq!(idx.cache_stats().inserts, 0);
1919

2020
/* First query */
21-
let query = Query::new("another", &["testing"]).limit(Some(60));
21+
let query = Query::new(&["another"], &["testing"]).limit(Some(60));
2222
println!("Querying {:?}", query);
2323
let results = idx.search(&query);
2424

@@ -33,7 +33,7 @@ fn it_works() {
3333
assert_eq!(idx.cache_stats().inserts, 1);
3434

3535
/* Test constraint */
36-
let query = Query::new("another", &["testing"])
36+
let query = Query::new(&["another"], &["testing"])
3737
.constraint(Some(1));
3838
println!("Querying {:?}", query);
3939
let results = idx.search(&query);
@@ -47,7 +47,7 @@ fn it_works() {
4747
assert_eq!(idx.cache_stats().inserts, 1);
4848

4949
/* Third query */
50-
let query = Query::new("this", &["entry"]).limit(Some(60));
50+
let query = Query::new(&["this"], &["entry"]).limit(Some(60));
5151
let results = idx.search(&query);
5252

5353
for result in &results {
@@ -59,7 +59,7 @@ fn it_works() {
5959
assert!(results[0].should_score > 0.0, "First result should have non-zero should-score");
6060

6161
/* Test multiple tokens matching in single phrase */
62-
let query = Query::new("test", &[]).limit(Some(60));
62+
let query = Query::new(&["test"], &[]).limit(Some(60));
6363
println!("Querying {:?}", query);
6464
let results = idx.search(&query);
6565

@@ -86,7 +86,7 @@ fn it_works_with_case_change_and_spellerror() {
8686
let idx = idx.finish();
8787

8888
/* Query with lowercase and single spell error */
89-
let query = Query::new("waszawa", &[]).limit(Some(1));
89+
let query = Query::new(&["waszawa"], &[]).limit(Some(1));
9090
println!("Querying {:?}", query);
9191
let results = idx.search(&query);
9292

@@ -112,7 +112,7 @@ fn it_works_with_small_tokens() {
112112
let idx = idx.finish();
113113

114114
/* First query */
115-
let query = Query::new("may", &["1"]).limit(Some(1));
115+
let query = Query::new(&["may"], &["1"]).limit(Some(1));
116116
println!("Querying {:?}", query);
117117
let results = idx.search(&query);
118118
for result in &results {
@@ -121,7 +121,7 @@ fn it_works_with_small_tokens() {
121121
assert_eq!(results.len(), 1);
122122
assert_eq!(results[0].index, 1);
123123

124-
let query = Query::new("may", &["2"]).limit(Some(1));
124+
let query = Query::new(&["may"], &["2"]).limit(Some(1));
125125
println!("Querying {:?}", query);
126126
let results = idx.search(&query);
127127
for result in &results {
@@ -130,7 +130,7 @@ fn it_works_with_small_tokens() {
130130
assert_eq!(results.len(), 1);
131131
assert_eq!(results[0].index, 2);
132132

133-
let query = Query::new("may", &["3"]).limit(Some(1));
133+
let query = Query::new(&["may"], &["3"]).limit(Some(1));
134134
println!("Querying {:?}", query);
135135
let results = idx.search(&query);
136136
for result in &results {
@@ -154,7 +154,7 @@ fn it_behaves_with_repeating_patterns() {
154154
assert!(idx.index.db.contains_key("bca"));
155155
assert!(idx.index.db.contains_key("cab"));
156156

157-
let query = Query::new("abc", &[]).max_distance(Some(3)).limit(Some(3));
157+
let query = Query::new(&["abc"], &[]).max_distance(Some(3)).limit(Some(3));
158158
let results = idx.search(&query);
159159
assert_eq!(results.len(), 1);
160160
assert_eq!(results[0].index, 1);
@@ -166,7 +166,7 @@ fn it_behaves_with_repeating_patterns() {
166166
idx.add_phrase(&repeating_phrase, 1, None).unwrap();
167167
let idx = idx.finish();
168168

169-
let query = Query::new("abc", &[]).limit(Some(3));
169+
let query = Query::new(&["abc"], &[]).limit(Some(3));
170170
let results = idx.search(&query);
171171
assert_eq!(results.len(), 1);
172172
assert_eq!(results[0].index, 1);
@@ -189,7 +189,7 @@ fn it_behaves_with_too_long_inputs() {
189189
assert!(idx.index.db.contains_key("cab"));
190190

191191
println!("Added {}", long_string);
192-
let query = Query::new(&long_string, &[]).limit(Some(3));
192+
let query = Query::new(&[&long_string], &[]).limit(Some(3));
193193
let results = idx.search(&query);
194194
assert_eq!(results.len(), 1);
195195
assert_eq!(results[0].index, 1);
@@ -204,7 +204,7 @@ fn it_behaves_with_too_long_inputs() {
204204
assert_eq!(1, idx.index.db.len());
205205
assert!(idx.index.db.contains_key("abc"));
206206

207-
let query = Query::new("abc", &[]).limit(Some(3));
207+
let query = Query::new(&["abc"], &[]).limit(Some(3));
208208
let results = idx.search(&query);
209209
assert_eq!(results.len(), 1);
210210
assert_eq!(results[0].index, 1);
@@ -220,12 +220,12 @@ fn it_detects_duplicate_phrase_idx() {
220220
assert!(idx.add_phrase("phrase three", 1, None).is_err());
221221
let idx = idx.finish();
222222

223-
let query = Query::new("rather", &[]).limit(Some(3));
223+
let query = Query::new(&["rather"], &[]).limit(Some(3));
224224
let results = idx.search(&query);
225225
assert_eq!(results.len(), 1);
226226
assert_eq!(results[0].index, 1);
227227

228-
let query = Query::new("duplicated", &[]).limit(Some(3));
228+
let query = Query::new(&["duplicated"], &[]).limit(Some(3));
229229
let results = idx.search(&query);
230230
assert_eq!(results.len(), 0);
231231
}

0 commit comments

Comments
 (0)