From a390bbd49b9936aa796fa2d0aa3f23bc157feb97 Mon Sep 17 00:00:00 2001 From: Nytelife26 Date: Sun, 19 Jan 2025 13:24:13 +0000 Subject: [PATCH] refactor: use native dynamic dispatch The previous emulation was inefficient and inconsistent. Now, the signature of all check functions is the same, and a layer of indirection is removed. In the future, this may allow for combining several specifications into one check. --- proselint-rs/Cargo.lock | 269 +++++++++++++++ proselint-rs/Cargo.toml | 6 +- .../proselint-checks/src/annotations.rs | 4 +- .../crates/proselint-checks/src/archaism.rs | 4 +- .../proselint-checks/src/cliches/hell.rs | 4 +- .../proselint-checks/src/cliches/misc.rs | 18 +- .../src/consistency/spacing.rs | 4 +- .../src/consistency/spelling.rs | 4 +- .../proselint-checks/src/dates_times/am_pm.rs | 10 +- .../proselint-checks/src/dates_times/dates.rs | 12 +- .../crates/proselint-checks/src/hedging.rs | 4 +- .../src/industrial_language/airlinese.rs | 4 +- .../src/industrial_language/bureaucratese.rs | 4 +- .../src/industrial_language/chatspeak.rs | 4 +- .../src/industrial_language/commercialese.rs | 6 +- .../industrial_language/corporate_speak.rs | 4 +- .../src/industrial_language/jargon.rs | 4 +- .../proselint-checks/src/lexical_illusions.rs | 4 +- .../proselint-checks/src/malapropisms.rs | 4 +- .../proselint-checks/src/misc/apologizing.rs | 4 +- .../src/misc/back_formations.rs | 4 +- .../proselint-checks/src/misc/braces.rs | 4 +- .../crates/proselint-checks/src/misc/but.rs | 4 +- .../src/misc/capitalization.rs | 22 +- .../proselint-checks/src/misc/composition.rs | 6 +- .../proselint-checks/src/misc/currency.rs | 4 +- .../proselint-checks/src/misc/debased.rs | 4 +- .../proselint-checks/src/misc/greylist.rs | 6 +- .../proselint-checks/src/misc/illogic.rs | 8 +- .../crates/proselint-registry/Cargo.toml | 4 +- .../crates/proselint-registry/src/checks.rs | 309 +----------------- .../proselint-registry/src/checks/types.rs | 283 ++++++++++++++++ proselint-rs/crates/proselint/src/utils.rs | 9 +- 33 files changed, 659 insertions(+), 385 deletions(-) create mode 100644 proselint-rs/crates/proselint-registry/src/checks/types.rs diff --git a/proselint-rs/Cargo.lock b/proselint-rs/Cargo.lock index 1fe5db5ba..960dbe595 100644 --- a/proselint-rs/Cargo.lock +++ b/proselint-rs/Cargo.lock @@ -11,6 +11,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "0.6.14" @@ -66,6 +72,12 @@ version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + [[package]] name = "base64" version = "0.21.7" @@ -96,12 +108,51 @@ dependencies = [ "serde", ] +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "clap" version = "4.5.4" @@ -184,6 +235,42 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + [[package]] name = "crossbeam-deque" version = "0.8.5" @@ -209,6 +296,12 @@ version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "dirs" version = "5.0.1" @@ -264,6 +357,16 @@ dependencies = [ "wasi", ] +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -276,6 +379,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" + [[package]] name = "indexmap" version = "2.2.6" @@ -287,18 +396,48 @@ dependencies = [ "serde", ] +[[package]] +name = "is-terminal" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "js-sys" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -321,6 +460,12 @@ dependencies = [ "libc", ] +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + [[package]] name = "memchr" version = "2.7.2" @@ -343,6 +488,27 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "oorandom" +version = "11.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" + [[package]] name = "option-ext" version = "0.2.0" @@ -397,6 +563,34 @@ dependencies = [ "siphasher", ] +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "proc-macro2" version = "1.0.84" @@ -440,6 +634,7 @@ dependencies = [ name = "proselint-registry" version = "0.1.0" dependencies = [ + "criterion", "fancy-regex", "phf", "regex", @@ -641,6 +836,16 @@ dependencies = [ "syn", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "toml" version = "0.8.14" @@ -710,6 +915,70 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasm-bindgen" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" + +[[package]] +name = "web-sys" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi-util" version = "0.1.9" diff --git a/proselint-rs/Cargo.toml b/proselint-rs/Cargo.toml index 394758617..ff955cc53 100644 --- a/proselint-rs/Cargo.toml +++ b/proselint-rs/Cargo.toml @@ -8,7 +8,11 @@ edition = "2021" license = "MIT" [workspace.dependencies] +# Internal proselint-checks = { path = "crates/proselint-checks" } proselint-registry = { path = "crates/proselint-registry" } -regex = "1.10.5" +# External +criterion = "0.5.1" fancy-regex = "0.13.0" +phf = { version = "0.11.2", features = ["macros"] } +regex = "1.10.5" diff --git a/proselint-rs/crates/proselint-checks/src/annotations.rs b/proselint-rs/crates/proselint-checks/src/annotations.rs index 66b9661d1..672cf9fde 100644 --- a/proselint-rs/crates/proselint-checks/src/annotations.rs +++ b/proselint-rs/crates/proselint-checks/src/annotations.rs @@ -1,4 +1,4 @@ -use proselint_registry::checks::{Check, CheckType, Padding}; +use proselint_registry::checks::{Check, Padding, types::*}; pub const EXAMPLES_PASS: &[&str] = &[ "Smoke phrase with nothing flagged.", @@ -9,7 +9,7 @@ pub const EXAMPLES_FAIL: &[&str] = &[ ]; const CHECK: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "FIXME", "FIX ME", diff --git a/proselint-rs/crates/proselint-checks/src/archaism.rs b/proselint-rs/crates/proselint-checks/src/archaism.rs index 572c4cb0a..de63409e1 100644 --- a/proselint-rs/crates/proselint-checks/src/archaism.rs +++ b/proselint-rs/crates/proselint-checks/src/archaism.rs @@ -1,4 +1,4 @@ -use proselint_registry::checks::{Check, CheckType, Padding}; +use proselint_registry::checks::{Check, types::*, Padding}; pub const EXAMPLES_PASS: &[&str] = &[ "Smoke phrase with nothing flagged.", @@ -7,7 +7,7 @@ pub const EXAMPLES_PASS: &[&str] = &[ pub const EXAMPLES_FAIL: &[&str] = &["I want to sleep, perchance to dream."]; const CHECK: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "alack", "anent", diff --git a/proselint-rs/crates/proselint-checks/src/cliches/hell.rs b/proselint-rs/crates/proselint-checks/src/cliches/hell.rs index fc600d308..1aaad665b 100644 --- a/proselint-rs/crates/proselint-checks/src/cliches/hell.rs +++ b/proselint-rs/crates/proselint-checks/src/cliches/hell.rs @@ -1,10 +1,10 @@ -use proselint_registry::checks::{Check, CheckType, CheckFlags}; +use proselint_registry::checks::{Check, types::*, CheckFlags}; pub const EXAMPLES_PASS: &[&str] = &["Smoke phrase with nothing flagged."]; pub const EXAMPLES_FAIL: &[&str] = &["I was at xyz and then all hell broke loose."]; const CHECK: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: "all hell broke loose", exceptions: &[], }, diff --git a/proselint-rs/crates/proselint-checks/src/cliches/misc.rs b/proselint-rs/crates/proselint-checks/src/cliches/misc.rs index 8591e8f57..c8276b80c 100644 --- a/proselint-rs/crates/proselint-checks/src/cliches/misc.rs +++ b/proselint-rs/crates/proselint-checks/src/cliches/misc.rs @@ -1,4 +1,4 @@ -use proselint_registry::checks::{Check, CheckType, Padding}; +use proselint_registry::checks::{Check, types::*, Padding}; pub const EXAMPLES_PASS: &[&str] = &[ "Smoke phrase with nothing flagged.", @@ -32,7 +32,7 @@ const MSG: &str = "'{}' is a cliché."; // source: Garner's Modern American Usage // source_url: http://bit.ly/1T4alrY const CHECK_GARNER: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "a fate worse than death", "alas and alack", @@ -125,7 +125,7 @@ const CHECK_GARNER: Check = Check { // source: write-good // source_url: https://github.com/btford/write-good const CHECK_WRITE_GOOD_A_TO_C: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "a chip off the old block", "a clean slate", @@ -289,7 +289,7 @@ const CHECK_WRITE_GOOD_A_TO_C: Check = Check { }; pub const CHECK_WRITE_GOOD_D_TO_J: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "dark before the dawn", "day in, day out", @@ -472,7 +472,7 @@ pub const CHECK_WRITE_GOOD_D_TO_J: Check = Check { // source: write-good // source_url: https://github.com/btford/write-good pub const CHECK_WRITE_GOOD_K_TO_O: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "keep a stiff upper lip", "keep an eye on", @@ -638,7 +638,7 @@ pub const CHECK_WRITE_GOOD_K_TO_O: Check = Check { // source: write-good // source_url: https://github.com/btford/write-good const CHECK_WRITE_GOOD_P_TO_S: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "pain and suffering", "pain in the", @@ -771,7 +771,7 @@ const CHECK_WRITE_GOOD_P_TO_S: Check = Check { // source: write-good // source_url: https://github.com/btford/write-good const CHECK_WRITE_GOOD_T_TO_Z: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "take a load off", "take one for the team", @@ -890,7 +890,7 @@ const CHECK_WRITE_GOOD_T_TO_Z: Check = Check { // source: GNU diction // source_url: https://directory.fsf.org/wiki/Diction const CHECK_GNU_DICTION: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "a matter of concern", "all things being equal", @@ -931,7 +931,7 @@ const CHECK_GNU_DICTION: Check = Check { // source: Nigel Ree's Dictionary of Cliches // source_url: bit.ly/3sL091j const CHECK_NIGEL: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "abhors a vacuum", "accident waiting to happen", diff --git a/proselint-rs/crates/proselint-checks/src/consistency/spacing.rs b/proselint-rs/crates/proselint-checks/src/consistency/spacing.rs index b5a267e9f..7d40ee609 100644 --- a/proselint-rs/crates/proselint-checks/src/consistency/spacing.rs +++ b/proselint-rs/crates/proselint-checks/src/consistency/spacing.rs @@ -1,4 +1,4 @@ -use proselint_registry::checks::{Check, CheckType}; +use proselint_registry::checks::{Check, types::*}; pub const EXAMPLES_PASS: &[&str] = &[ "Smoke phrase with nothing flagged.", @@ -7,7 +7,7 @@ pub const EXAMPLES_PASS: &[&str] = &[ pub const EXAMPLES_FAIL: &[&str] = &["This is bad. Not consistent. At all."]; const CHECK: Check = Check { - check_type: CheckType::Consistency { + check_type: &Consistency { word_pairs: &[[r"[\.\?!] [A-Z]", r"[\.\?!] [A-Z]"]] }, path: "consistency.spacing", diff --git a/proselint-rs/crates/proselint-checks/src/consistency/spelling.rs b/proselint-rs/crates/proselint-checks/src/consistency/spelling.rs index be312efde..946498936 100644 --- a/proselint-rs/crates/proselint-checks/src/consistency/spelling.rs +++ b/proselint-rs/crates/proselint-checks/src/consistency/spelling.rs @@ -1,4 +1,4 @@ -use proselint_registry::checks::{Check, CheckType}; +use proselint_registry::checks::{Check, types::*}; pub const EXAMPLES_PASS: &[&str] = &[ "Smoke phrase with nothing flagged.", @@ -11,7 +11,7 @@ pub const EXAMPLES_FAIL: &[&str] = &[ const CHECK: Check = Check { // TODO: add more BE, UE, even generalize [a-z]+(ize|ized|izing)? - check_type: CheckType::Consistency { word_pairs: &[ + check_type: &Consistency { word_pairs: &[ ["advisor", "adviser"], // ["analyse", "analyze"], ["centre", "center"], diff --git a/proselint-rs/crates/proselint-checks/src/dates_times/am_pm.rs b/proselint-rs/crates/proselint-checks/src/dates_times/am_pm.rs index 33a35938b..a0ac6c5a7 100644 --- a/proselint-rs/crates/proselint-checks/src/dates_times/am_pm.rs +++ b/proselint-rs/crates/proselint-checks/src/dates_times/am_pm.rs @@ -1,6 +1,6 @@ use const_format::str_replace; -use proselint_registry::checks::{Check, CheckType, Padding}; +use proselint_registry::checks::{Check, types::*, Padding}; pub const EXAMPLES_PASS: &[&str] = &[ "Smoke phrase with nothing flagged.", @@ -19,7 +19,7 @@ pub const EXAMPLES_FAIL: &[&str] = &[ ]; const CHECK_LOWERCASE_PERIODS: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: str_replace!(Padding::WordsInText.as_str(), "{}", r"\d{1,2} ?[ap]m"), exceptions: &[], }, @@ -30,7 +30,7 @@ const CHECK_LOWERCASE_PERIODS: Check = Check { }; const CHECK_SPACING: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: r"\b\d{1,2}[ap]\.?m\.?", exceptions: &[], }, @@ -40,7 +40,7 @@ const CHECK_SPACING: Check = Check { }; const CHECK_MIDNIGHT_NOON: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: r"\b12 ?[ap]\.?m\.?", exceptions: &[], }, @@ -50,7 +50,7 @@ const CHECK_MIDNIGHT_NOON: Check = Check { }; const CHECK_REDUNDANCY: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ r"\b\d{1,2} ?a\.?m\.? in the morning", r"\b\d{1,2} ?p\.?m\.? in the evening", diff --git a/proselint-rs/crates/proselint-checks/src/dates_times/dates.rs b/proselint-rs/crates/proselint-checks/src/dates_times/dates.rs index 002068895..27a82a8c2 100644 --- a/proselint-rs/crates/proselint-checks/src/dates_times/dates.rs +++ b/proselint-rs/crates/proselint-checks/src/dates_times/dates.rs @@ -1,4 +1,4 @@ -use proselint_registry::{pad, checks::{Check, CheckType, Padding}}; +use proselint_registry::{pad, checks::{Check, types::*, Padding}}; use const_format::concatcp; pub const EXAMPLES_PASS: &[&str] = &[ @@ -22,7 +22,7 @@ pub const EXAMPLES_FAIL: &[&str] = &[ ]; const CHECK_DECADES_APOSTROPHES_SHORT: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: pad!(Padding::WordsInText, r"\d0\'s"), exceptions: &[], }, @@ -32,7 +32,7 @@ const CHECK_DECADES_APOSTROPHES_SHORT: Check = Check { }; const CHECK_DECADES_APOSTROPHES_LONG: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: pad!(Padding::WordsInText, r"\d\d\d0\'s"), exceptions: &[], }, @@ -42,7 +42,7 @@ const CHECK_DECADES_APOSTROPHES_LONG: Check = Check { }; const CHECK_DASH_AND_FROM: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: pad!(Padding::WordsInText, r"from \d+[^ \t\n\r\f\v\w.]\d+"), exceptions: &[], }, @@ -58,7 +58,7 @@ const MONTHS_SEPARATED: &str = |August|September|October|November|December"; const CHECK_MONTH_YEAR_COMMA: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: concatcp!(pad!(Padding::SafeJoin, MONTHS_SEPARATED), r", \d{3,}"), exceptions: &[], }, @@ -68,7 +68,7 @@ const CHECK_MONTH_YEAR_COMMA: Check = Check { }; const CHECK_MONTH_OF_YEAR: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: concatcp!(pad!(Padding::SafeJoin, MONTHS_SEPARATED), r"of \d{3,}"), exceptions: &[], }, diff --git a/proselint-rs/crates/proselint-checks/src/hedging.rs b/proselint-rs/crates/proselint-checks/src/hedging.rs index 2b2d346a7..e3e6a493a 100644 --- a/proselint-rs/crates/proselint-checks/src/hedging.rs +++ b/proselint-rs/crates/proselint-checks/src/hedging.rs @@ -1,4 +1,4 @@ -use proselint_registry::checks::{Check, CheckType, Padding}; +use proselint_registry::checks::{Check, types::*, Padding}; pub const EXAMPLES_PASS: &[&str] = &["Smoke phrase with nothing flagged."]; pub const EXAMPLES_FAIL: &[&str] = &[ @@ -7,7 +7,7 @@ pub const EXAMPLES_FAIL: &[&str] = &[ ]; const CHECK: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "I would argue that", "so to speak", diff --git a/proselint-rs/crates/proselint-checks/src/industrial_language/airlinese.rs b/proselint-rs/crates/proselint-checks/src/industrial_language/airlinese.rs index 61ef7f865..861772af7 100644 --- a/proselint-rs/crates/proselint-checks/src/industrial_language/airlinese.rs +++ b/proselint-rs/crates/proselint-checks/src/industrial_language/airlinese.rs @@ -1,4 +1,4 @@ -use proselint_registry::checks::{Check, CheckType, Padding}; +use proselint_registry::checks::{Check, types::*, Padding}; pub const EXAMPLES_PASS: &[&str] = &[ "Smoke phrase with nothing flagged.", @@ -11,7 +11,7 @@ pub const EXAMPLES_FAIL: &[&str] = &[ ]; const CHECK: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "enplan(?:e|ed|ing|ement)", "deplan(?:e|ed|ing|ement)", diff --git a/proselint-rs/crates/proselint-checks/src/industrial_language/bureaucratese.rs b/proselint-rs/crates/proselint-checks/src/industrial_language/bureaucratese.rs index 5edb48e4a..97c3fbb92 100644 --- a/proselint-rs/crates/proselint-checks/src/industrial_language/bureaucratese.rs +++ b/proselint-rs/crates/proselint-checks/src/industrial_language/bureaucratese.rs @@ -1,4 +1,4 @@ -use proselint_registry::{pad, checks::{Check, CheckType, Padding}}; +use proselint_registry::{pad, checks::{Check, types::*, Padding}}; pub const EXAMPLES_PASS: &[&str] = &["Smoke phrase with nothing flagged."]; pub const EXAMPLES_FAIL: &[&str] = &[ @@ -6,7 +6,7 @@ pub const EXAMPLES_FAIL: &[&str] = &[ ]; const CHECK: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: pad!(Padding::WordsInText, r"(met|meets?) with your approval"), exceptions: &[], }, diff --git a/proselint-rs/crates/proselint-checks/src/industrial_language/chatspeak.rs b/proselint-rs/crates/proselint-checks/src/industrial_language/chatspeak.rs index d5f28d998..158eff85d 100644 --- a/proselint-rs/crates/proselint-checks/src/industrial_language/chatspeak.rs +++ b/proselint-rs/crates/proselint-checks/src/industrial_language/chatspeak.rs @@ -1,10 +1,10 @@ -use proselint_registry::checks::{Check, CheckType, Padding}; +use proselint_registry::checks::{Check, types::*, Padding}; pub const EXAMPLES_PASS: &[&str] = &["Smoke phrase with nothing flagged."]; pub const EXAMPLES_FAIL: &[&str] = &["BRB getting coffee."]; const CHECK: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "2day", "4U", diff --git a/proselint-rs/crates/proselint-checks/src/industrial_language/commercialese.rs b/proselint-rs/crates/proselint-checks/src/industrial_language/commercialese.rs index 336caeca6..54f760e21 100644 --- a/proselint-rs/crates/proselint-checks/src/industrial_language/commercialese.rs +++ b/proselint-rs/crates/proselint-checks/src/industrial_language/commercialese.rs @@ -1,4 +1,4 @@ -use proselint_registry::checks::{Check, CheckType, Padding}; +use proselint_registry::checks::{Check, types::*, Padding}; use phf::phf_map; pub const EXAMPLES_PASS: &[&str] = &["Smoke phrase with nothing flagged."]; @@ -8,7 +8,7 @@ pub const EXAMPLES_FAIL: &[&str] = &[ ]; const CHECK: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "acknowledging yours of", "beg to advise", @@ -43,7 +43,7 @@ const CHECK: Check = Check { }; const CHECK_ABBREV: Check = Check { - check_type: CheckType::PreferredForms { + check_type: &PreferredForms { items: &phf_map!( r"inst\." => "this month", r"prox\." => "next month", diff --git a/proselint-rs/crates/proselint-checks/src/industrial_language/corporate_speak.rs b/proselint-rs/crates/proselint-checks/src/industrial_language/corporate_speak.rs index d002dc0c8..625eba832 100644 --- a/proselint-rs/crates/proselint-checks/src/industrial_language/corporate_speak.rs +++ b/proselint-rs/crates/proselint-checks/src/industrial_language/corporate_speak.rs @@ -1,4 +1,4 @@ -use proselint_registry::checks::{Check, CheckType, Padding}; +use proselint_registry::checks::{Check, types::*, Padding}; pub const EXAMPLES_PASS: &[&str] = &[ "Smoke phrase with nothing flagged.", @@ -7,7 +7,7 @@ pub const EXAMPLES_PASS: &[&str] = &[ pub const EXAMPLES_FAIL: &[&str] = &["We will circle back around to it."]; const CHECK: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "at the end of the day", "back to the drawing board", diff --git a/proselint-rs/crates/proselint-checks/src/industrial_language/jargon.rs b/proselint-rs/crates/proselint-checks/src/industrial_language/jargon.rs index 177547caf..118547b35 100644 --- a/proselint-rs/crates/proselint-checks/src/industrial_language/jargon.rs +++ b/proselint-rs/crates/proselint-checks/src/industrial_language/jargon.rs @@ -1,10 +1,10 @@ -use proselint_registry::checks::{Check, CheckType, Padding}; +use proselint_registry::checks::{Check, types::*, Padding}; pub const EXAMPLES_PASS: &[&str] = &["Smoke phrase with nothing flagged."]; pub const EXAMPLES_FAIL: &[&str] = &["I agree it's in the affirmative."]; const CHECK: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "in the affirmative", "in the negative", diff --git a/proselint-rs/crates/proselint-checks/src/lexical_illusions.rs b/proselint-rs/crates/proselint-checks/src/lexical_illusions.rs index 10eb50eb6..f97a99f90 100644 --- a/proselint-rs/crates/proselint-checks/src/lexical_illusions.rs +++ b/proselint-rs/crates/proselint-checks/src/lexical_illusions.rs @@ -1,4 +1,4 @@ -use proselint_registry::checks::{Check, CheckType}; +use proselint_registry::checks::{Check, types::*}; pub const EXAMPLES_PASS: &[&str] = &[ "Smoke phrase with nothing flagged.", @@ -23,7 +23,7 @@ pub const EXAMPLES_FAIL: &[&str] = &[ ]; const CHECK: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { // NOTE: this can't be padded without mod -> \1 pattern: r"\b(? "improper"), padding: Padding::WordsInText, }, diff --git a/proselint-rs/crates/proselint-checks/src/misc/braces.rs b/proselint-rs/crates/proselint-checks/src/misc/braces.rs index 0d9287538..bc032ffc0 100644 --- a/proselint-rs/crates/proselint-checks/src/misc/braces.rs +++ b/proselint-rs/crates/proselint-checks/src/misc/braces.rs @@ -1,4 +1,4 @@ -use proselint_registry::checks::{Check, CheckType, CheckResult}; +use proselint_registry::checks::{Check, CheckResult}; use regex::Regex; pub const EXAMPLES_PASS: &[&str] = &[]; @@ -41,7 +41,7 @@ fn check_unmatched(text: &str, spec: &Check) -> Vec { } const CHECK: Check = Check { - check_type: CheckType::CheckFn(&check_unmatched), + check_type: &check_unmatched, path: "misc.braces.unmatched", msg: "Match braces:", ..Check::default() diff --git a/proselint-rs/crates/proselint-checks/src/misc/but.rs b/proselint-rs/crates/proselint-checks/src/misc/but.rs index ae8c07c5b..2547b0622 100644 --- a/proselint-rs/crates/proselint-checks/src/misc/but.rs +++ b/proselint-rs/crates/proselint-checks/src/misc/but.rs @@ -1,4 +1,4 @@ -use proselint_registry::{pad, checks::{Check, CheckType, Padding}}; +use proselint_registry::{pad, checks::{Check, types::*, Padding}}; pub const EXAMPLES_PASS: &[&str] = &[ "Smoke phrase with nothing flagged.", @@ -14,7 +14,7 @@ pub const EXAMPLES_FAIL: &[&str] = &[ ]; const CHECK: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: pad!(Padding::WordsInText, r"(?:^|[\.!\?]\s*)But"), exceptions: &[], }, diff --git a/proselint-rs/crates/proselint-checks/src/misc/capitalization.rs b/proselint-rs/crates/proselint-checks/src/misc/capitalization.rs index 14be371a0..8e67e4a37 100644 --- a/proselint-rs/crates/proselint-checks/src/misc/capitalization.rs +++ b/proselint-rs/crates/proselint-checks/src/misc/capitalization.rs @@ -1,4 +1,4 @@ -use proselint_registry::{pad, checks::{Check, CheckType, CheckResult, Padding}}; +use proselint_registry::{pad, checks::{Check, types::*, CheckResult, Padding}}; use phf::phf_map; pub const EXAMPLES_PASS: &[&str] = &[ @@ -23,7 +23,7 @@ pub const EXAMPLES_FAIL: &[&str] = &[ ]; const CHECK_TERMS: Check = Check { - check_type: CheckType::PreferredFormsSimple { + check_type: &PreferredFormsSimple { items: &phf_map!( "stone age" => "Stone Age", "Space Age" => "space age", @@ -39,7 +39,7 @@ const CHECK_TERMS: Check = Check { }; const CHECK_SEASONS: Check = Check { - check_type: CheckType::PreferredFormsSimple { + check_type: &PreferredFormsSimple { items: &phf_map!( "Winter" => "winter", "Fall" => "fall", @@ -58,7 +58,7 @@ const CHECK_MONTHS: Check = Check { // too many false positives: may, march // TODO: deal with collisions // i.e. "(you|he|...) may proceed" follows a pattern - check_type: CheckType::PreferredFormsSimple { + check_type: &PreferredFormsSimple { items: &phf_map!( "january" => "January", "february" => "February", @@ -80,7 +80,7 @@ const CHECK_MONTHS: Check = Check { }; const CHECK_DAYS: Check = Check { - check_type: CheckType::PreferredFormsSimple { + check_type: &PreferredFormsSimple { items: &phf_map!( "monday" => "Monday", "tuesday" => "Tuesday", @@ -99,7 +99,7 @@ const CHECK_DAYS: Check = Check { }; const CHECK_ROMAN_WAR: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: r"World War ((I*i+)|(i+I*))+", exceptions: &[], }, @@ -112,7 +112,7 @@ const CHECK_ROMAN_WAR: Check = Check { // TODO: this could be tidier if a filter flag existed // it also should not be split into two checks - refactor if possible const _CHECK_ROMAN_NUMERALS: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: pad!( Padding::WordsInText, r"M{0,3}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})", @@ -126,19 +126,19 @@ const _CHECK_ROMAN_NUMERALS: Check = Check { }; const NUMERALS: [char; 7] = ['m', 'd', 'c', 'l', 'x', 'v', 'i']; -fn check_roman_numerals(text: &str, _check: &Check) -> Vec { +fn check_roman_numerals(text: &str, check: &Check) -> Vec { _CHECK_ROMAN_NUMERALS - .dispatch(text) + .check(text, check) .into_iter() .filter(|result| { let item = text[result.start_pos..result.end_pos].trim(); - item.len() > 0 && item.chars().any(|x| NUMERALS.contains(&x)) + !item.is_empty() && item.chars().any(|x| NUMERALS.contains(&x)) }) .collect() } const CHECK_ROMAN_NUMERALS: Check = Check { - check_type: CheckType::CheckFn(&check_roman_numerals), + check_type: &check_roman_numerals, path: "misc.capitalization.roman_num", msg: "Capitalize the roman numeral '{}'.", ..Check::default() diff --git a/proselint-rs/crates/proselint-checks/src/misc/composition.rs b/proselint-rs/crates/proselint-checks/src/misc/composition.rs index f87a86bf3..e2fd7a73e 100644 --- a/proselint-rs/crates/proselint-checks/src/misc/composition.rs +++ b/proselint-rs/crates/proselint-checks/src/misc/composition.rs @@ -1,4 +1,4 @@ -use proselint_registry::checks::{Check, CheckType, Padding}; +use proselint_registry::checks::{Check, types::*, Padding}; use phf::phf_map; pub const EXAMPLES_PASS: &[&str] = &["Smoke phrase with nothing flagged."]; @@ -13,7 +13,7 @@ const NAME: &str = "misc.composition"; const MSG: &str = "Try '{}' instead of '{}'."; const CHECK: Check = Check { - check_type: CheckType::PreferredFormsSimple { + check_type: &PreferredFormsSimple { items: &phf_map!( // Put statements in positive form "not honest" => "dishonest", @@ -44,7 +44,7 @@ const CHECK: Check = Check { }; const CHECK_REGEX: Check = Check { - check_type: CheckType::PreferredForms { + check_type: &PreferredForms { items: &phf_map!( r"did not pay (any )?attention to" => "ignored", r"(had )?not succeeded" => "failed", diff --git a/proselint-rs/crates/proselint-checks/src/misc/currency.rs b/proselint-rs/crates/proselint-checks/src/misc/currency.rs index d006f5c26..c2c676d9d 100644 --- a/proselint-rs/crates/proselint-checks/src/misc/currency.rs +++ b/proselint-rs/crates/proselint-checks/src/misc/currency.rs @@ -1,10 +1,10 @@ -use proselint_registry::{pad, checks::{Check, CheckType, Padding}}; +use proselint_registry::{pad, checks::{Check, types::*, Padding}}; pub const EXAMPLES_PASS: &[&str] = &["Smoke phrase with nothing flagged."]; pub const EXAMPLES_FAIL: &[&str] = &["It cost $10 dollars."]; const CHECK: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: pad!( Padding::SeparatorInText, r"\$[\d]* ?(?:dollars|usd|us dollars)" diff --git a/proselint-rs/crates/proselint-checks/src/misc/debased.rs b/proselint-rs/crates/proselint-checks/src/misc/debased.rs index 4b4cad533..91a335d35 100644 --- a/proselint-rs/crates/proselint-checks/src/misc/debased.rs +++ b/proselint-rs/crates/proselint-checks/src/misc/debased.rs @@ -1,10 +1,10 @@ -use proselint_registry::checks::{Check, CheckType, Padding}; +use proselint_registry::checks::{Check, types::*, Padding}; pub const EXAMPLES_PASS: &[&str] = &["Smoke phrase with nothing flagged."]; pub const EXAMPLES_FAIL: &[&str] = &["This leaves much to be desired."]; const CHECK: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "a not unjustifiable assumption", "leaves much to be desired", diff --git a/proselint-rs/crates/proselint-checks/src/misc/greylist.rs b/proselint-rs/crates/proselint-checks/src/misc/greylist.rs index 22d590d82..6f291df5b 100644 --- a/proselint-rs/crates/proselint-checks/src/misc/greylist.rs +++ b/proselint-rs/crates/proselint-checks/src/misc/greylist.rs @@ -1,4 +1,4 @@ -use proselint_registry::{pad, checks::{Check, CheckType, Padding}}; +use proselint_registry::{pad, checks::{Check, types::*, Padding}}; pub const EXAMPLES_PASS: &[&str] = &["Smoke phrase with nothing flagged."]; pub const EXAMPLES_FAIL: &[&str] = &[ @@ -9,7 +9,7 @@ pub const EXAMPLES_FAIL: &[&str] = &[ ]; const CHECK_OBVIOUSLY: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: pad!(Padding::WordsInText, "obviously"), exceptions: &[], }, @@ -19,7 +19,7 @@ const CHECK_OBVIOUSLY: Check = Check { }; const CHECK_UTILIZE: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: pad!(Padding::WordsInText, "utilize"), exceptions: &[], }, diff --git a/proselint-rs/crates/proselint-checks/src/misc/illogic.rs b/proselint-rs/crates/proselint-checks/src/misc/illogic.rs index da2dabfd9..f94887b52 100644 --- a/proselint-rs/crates/proselint-checks/src/misc/illogic.rs +++ b/proselint-rs/crates/proselint-checks/src/misc/illogic.rs @@ -1,4 +1,4 @@ -use proselint_registry::{pad, checks::{Check, CheckType, Padding}}; +use proselint_registry::{pad, checks::{Check, types::*, Padding}}; pub const EXAMPLES_PASS: &[&str] = &["Smoke phrase with nothing flagged."]; pub const EXAMPLES_FAIL: &[&str] = &[ @@ -9,7 +9,7 @@ pub const EXAMPLES_FAIL: &[&str] = &[ ]; const CHECK: Check = Check { - check_type: CheckType::Existence { + check_type: &Existence { items: &[ "preplan", r"more than .{1,10} all", @@ -30,7 +30,7 @@ const CHECK: Check = Check { }; const CHECK_COIN: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: pad!(Padding::WordsInText, "to coin a phrase from"), exceptions: &[], }, @@ -40,7 +40,7 @@ const CHECK_COIN: Check = Check { }; const CHECK_COLLUSION: Check = Check { - check_type: CheckType::ExistenceSimple { + check_type: &ExistenceSimple { pattern: pad!(Padding::WordsInText, "without your collusion"), exceptions: &[], }, diff --git a/proselint-rs/crates/proselint-registry/Cargo.toml b/proselint-rs/crates/proselint-registry/Cargo.toml index c0120aae3..936940780 100644 --- a/proselint-rs/crates/proselint-registry/Cargo.toml +++ b/proselint-rs/crates/proselint-registry/Cargo.toml @@ -4,9 +4,7 @@ version.workspace = true edition.workspace = true license.workspace = true -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] -phf = "0.11.2" +phf = { workspace = true } regex = { workspace = true } fancy-regex = { workspace = true } diff --git a/proselint-rs/crates/proselint-registry/src/checks.rs b/proselint-rs/crates/proselint-registry/src/checks.rs index 4cd695731..324eeca26 100644 --- a/proselint-rs/crates/proselint-registry/src/checks.rs +++ b/proselint-rs/crates/proselint-registry/src/checks.rs @@ -1,4 +1,6 @@ -use regex::{Match, Regex, RegexBuilder}; +use types::CheckType; + +pub mod types; #[derive(Clone, Copy, Debug, Default)] pub enum Padding { @@ -68,248 +70,6 @@ pub struct CheckResult { pub replacements: Option, } -pub trait CheckFn: Fn(&str, &Check) -> Vec + Send + Sync {} - -impl Vec + Send + Sync> CheckFn for T {} - -#[derive(Clone, Copy)] -pub enum CheckType { - Consistency { - word_pairs: &'static [[&'static str; 2]], - }, - PreferredForms { - items: &'static phf::Map<&'static str, &'static str>, - padding: Padding, - }, - PreferredFormsSimple { - items: &'static phf::Map<&'static str, &'static str>, - padding: Padding, - }, - Existence { - items: &'static [&'static str], - padding: Padding, - exceptions: &'static [&'static str], - }, - ExistenceSimple { - pattern: &'static str, - exceptions: &'static [&'static str], - }, - ReverseExistence { - allowed: &'static [&'static str], - }, - CheckFn(&'static dyn CheckFn), -} - -impl CheckType { - fn consistency( - text: &str, - check: &Check, - word_pairs: &[[&str; 2]], - ) -> Vec { - let mut results: Vec = vec![]; - - for pair in word_pairs { - let matches: [Vec; 2] = pair - .iter() - .map(|part| { - RegexBuilder::new(part) - .case_insensitive(check.ignore_case) - .build() - .unwrap() - .find_iter(text) - .collect::>() - }) - .collect::>() - .try_into() - .unwrap(); - - let idx_minority = (matches[0].len() > matches[1].len()) as usize; - results.extend(matches[idx_minority].iter().map(|m| { - CheckResult { - start_pos: m.start() + check.offset[0], - end_pos: m.end() + check.offset[1], - check_name: check.path, - message: check.msg.to_string(), - replacements: Some(pair[0].to_string()), - } - })); - } - results - } - - fn preferred_forms( - text: &str, - check: &Check, - items: &phf::Map<&str, &str>, - padding: Padding, - ) -> Vec { - let offset = padding.to_offset_from(check.offset); - - // TODO: benchmark replacing this with RegexSet - items - .entries() - .flat_map(|(original, replacement)| { - RegexBuilder::new(&padding.pad(original)) - .case_insensitive(check.ignore_case) - .build() - .unwrap() - .find_iter(text) - .map(|m| CheckResult { - start_pos: m.start() + offset[0], - end_pos: m.end() + offset[1], - check_name: check.path, - message: check.msg.to_string(), - replacements: Some(replacement.to_string()), - }) - .collect::>() - }) - .collect() - } - - fn preferred_forms_simple( - text: &str, - check: &Check, - items: &phf::Map<&str, &str>, - padding: Padding, - ) -> Vec { - let offset = padding.to_offset_from(check.offset); - - let rx = &padding.pad( - &(if items.len() > 1 { - Padding::SafeJoin.pad( - &items.keys().map(|x| *x).collect::>().join("|"), - ) - } else { - items.keys().next().unwrap().to_string() - }), - ); - - // TODO: benchmark replacing this with RegexSet - RegexBuilder::new(rx) - .case_insensitive(check.ignore_case) - .build() - .unwrap() - .find_iter(text) - .map(|m| { - let original = m.as_str().trim(); - let replacements = - items.get(original).map(|entry| entry.to_string()); - - CheckResult { - start_pos: m.start() + offset[0], - end_pos: m.end() + offset[1], - check_name: check.path, - message: check.msg.to_string(), - replacements, - } - }) - .collect() - } - - fn existence( - text: &str, - check: &Check, - items: &[&str], - padding: Padding, - exceptions: &[&str], - ) -> Vec { - let offset = padding.to_offset_from(check.offset); - - let rx = &padding.pad( - &(if items.len() > 1 { - Padding::SafeJoin.pad(&items.join("|")) - } else { - items.first().unwrap().to_string() - }), - ); - - let regex_exceptions = exceptions - .iter() - .map(|exception| { - RegexBuilder::new(exception) - .case_insensitive(check.ignore_case) - .build() - .unwrap() - }) - .collect::>(); - - regex::RegexBuilder::new(rx) - .case_insensitive(check.ignore_case) - .build() - .unwrap() - .find_iter(text) - .filter_map(|m| { - let match_text = m.as_str().trim(); - (!regex_exceptions - .iter() - .any(|exception| exception.is_match(match_text))) - .then(|| CheckResult { - start_pos: m.start() + offset[0], - end_pos: m.end() + offset[1], - check_name: check.path, - message: check.msg.to_string(), - replacements: None, - }) - }) - .collect() - } - - fn existence_simple( - text: &str, - check: &Check, - pattern: &str, - exceptions: &[&str], - ) -> Vec { - // TODO: this should be a RegexBuilder with case_insensitive - // held up by fancy-regex#132 - let regex_pattern = fancy_regex::Regex::new(pattern).unwrap(); - let regex_exceptions = exceptions - .iter() - .map(|exception| fancy_regex::Regex::new(exception).unwrap()) - .collect::>(); - regex_pattern - .find_iter(text) - .filter_map(|x| { - x.ok().and_then(|m| { - (!regex_exceptions.iter().any(|exception| { - exception.is_match(m.as_str()).unwrap() - })) - .then(|| CheckResult { - start_pos: m.start(), - end_pos: m.end(), - check_name: check.path, - message: check.msg.to_string(), - replacements: None, - }) - }) - }) - .collect() - } - - fn rev_existence( - text: &str, - check: &Check, - allowed: &[&str], - ) -> Vec { - let tokenizer = Regex::new(r"\w[\w'-]+\w").unwrap(); - tokenizer - .find_iter(text) - .filter_map(|m| { - let match_text = m.as_str(); - (!match_text.chars().any(|c| c.is_ascii_digit()) - && !allowed.contains(&match_text)) - .then(|| CheckResult { - start_pos: m.start() + check.offset[0] + 1, - end_pos: m.end() + check.offset[1], - check_name: check.path, - message: check.msg.to_string(), - replacements: None, - }) - }) - .collect() - } -} - #[derive(Clone, Copy, Debug)] pub struct CheckFlags { pub limit_results: u8, @@ -324,7 +84,7 @@ impl CheckFlags { #[derive(Clone, Copy)] pub struct Check { - pub check_type: CheckType, + pub check_type: &'static dyn CheckType, pub path: &'static str, pub msg: &'static str, pub flags: CheckFlags, @@ -332,10 +92,20 @@ pub struct Check { pub offset: [usize; 2], } +impl CheckType for Check { + fn check(&self, text: &str, _: &Check) -> Vec { + self.check_type.check(text, self) + } +} + +fn default_check_fn(_: &str, _: &Check) -> Vec { + unimplemented!() +} + impl Check { pub const fn default() -> Self { Check { - check_type: CheckType::CheckFn(&|_, _| vec![]), + check_type: &default_check_fn, path: "", msg: "", flags: CheckFlags::default(), @@ -343,53 +113,4 @@ impl Check { offset: [0, 0], } } - - pub fn dispatch(&self, text: &str) -> Vec { - use CheckType::*; - - match self.check_type.to_owned() { - Consistency { word_pairs } => CheckType::consistency( - text, - self, - word_pairs, - ), - PreferredForms { items, padding } => CheckType::preferred_forms( - text, - self, - items, - padding, - ), - PreferredFormsSimple { items, padding } => { - CheckType::preferred_forms_simple( - text, - self, - items, - padding, - ) - } - Existence { items, padding, exceptions } => { - CheckType::existence( - text, - self, - items, - padding, - exceptions, - ) - } - ExistenceSimple { pattern, exceptions } => { - CheckType::existence_simple( - text, - self, - pattern, - exceptions, - ) - } - ReverseExistence { allowed } => CheckType::rev_existence( - text, - self, - allowed, - ), - CheckFn(check_fn) => check_fn(text, self), - } - } } diff --git a/proselint-rs/crates/proselint-registry/src/checks/types.rs b/proselint-rs/crates/proselint-registry/src/checks/types.rs new file mode 100644 index 000000000..94db4e269 --- /dev/null +++ b/proselint-rs/crates/proselint-registry/src/checks/types.rs @@ -0,0 +1,283 @@ +use phf::phf_map; +use regex::{Match, Regex, RegexBuilder}; + +use super::{Check, CheckResult, Padding}; + +pub trait CheckFn: Fn(&str, &Check) -> Vec + Send + Sync {} + +impl Vec + Send + Sync> CheckFn for T {} + +pub trait CheckType { + fn check(&self, text: &str, check: &Check) -> Vec; +} + +impl CheckType for T { + fn check(&self, text: &str, check: &Check) -> Vec { + self(text, check) + } +} + +pub struct Consistency { + pub word_pairs: &'static [[&'static str; 2]], +} + +impl Consistency { + pub const fn default() -> Self { + Consistency { word_pairs: &[] } + } +} + +impl CheckType for Consistency { + fn check(&self, text: &str, check: &Check) -> Vec { + let mut results: Vec = vec![]; + + for pair in self.word_pairs { + let matches: [Vec; 2] = pair + .iter() + .map(|part| { + RegexBuilder::new(part) + .case_insensitive(check.ignore_case) + .build() + .unwrap() + .find_iter(text) + .collect::>() + }) + .collect::>() + .try_into() + .unwrap(); + + let idx_minority = (matches[0].len() > matches[1].len()) as usize; + results.extend(matches[idx_minority].iter().map(|m| { + CheckResult { + start_pos: m.start() + check.offset[0], + end_pos: m.end() + check.offset[1], + check_name: check.path, + message: check.msg.to_string(), + replacements: Some(pair[0].to_string()), + } + })); + } + results + } +} + +pub struct PreferredForms { + pub items: &'static phf::Map<&'static str, &'static str>, + pub padding: Padding, +} + +impl PreferredForms { + pub const fn default() -> Self { + PreferredForms { items: &phf_map! {}, padding: Padding::WordsInText } + } +} + +impl CheckType for PreferredForms { + fn check(&self, text: &str, check: &Check) -> Vec { + let offset = self.padding.to_offset_from(check.offset); + + // TODO: benchmark replacing this with RegexSet + self.items + .entries() + .flat_map(|(original, replacement)| { + RegexBuilder::new(&self.padding.pad(original)) + .case_insensitive(check.ignore_case) + .build() + .unwrap() + .find_iter(text) + .map(|m| CheckResult { + start_pos: m.start() + offset[0], + end_pos: m.end() + offset[1], + check_name: check.path, + message: check.msg.to_string(), + replacements: Some(replacement.to_string()), + }) + .collect::>() + }) + .collect() + } +} + +pub struct PreferredFormsSimple { + pub items: &'static phf::Map<&'static str, &'static str>, + pub padding: Padding, +} + +impl PreferredFormsSimple { + pub const fn default() -> Self { + PreferredFormsSimple { + items: &phf_map! {}, + padding: Padding::WordsInText, + } + } +} + +impl CheckType for PreferredFormsSimple { + fn check(&self, text: &str, check: &Check) -> Vec { + let offset = self.padding.to_offset_from(check.offset); + + let rx = &self.padding.pad( + &(if self.items.len() > 1 { + Padding::SafeJoin.pad( + &self.items.keys().copied().collect::>().join("|"), + ) + } else { + self.items.keys().next().unwrap().to_string() + }), + ); + + // TODO: benchmark replacing this with RegexSet + RegexBuilder::new(rx) + .case_insensitive(check.ignore_case) + .build() + .unwrap() + .find_iter(text) + .map(|m| { + let original = m.as_str().trim(); + let replacements = + self.items.get(original).map(|entry| entry.to_string()); + + CheckResult { + start_pos: m.start() + offset[0], + end_pos: m.end() + offset[1], + check_name: check.path, + message: check.msg.to_string(), + replacements, + } + }) + .collect() + } +} + +pub struct Existence { + pub items: &'static [&'static str], + pub padding: Padding, + pub exceptions: &'static [&'static str], +} + +impl Existence { + pub const fn default() -> Self { + Existence { + items: &[], + padding: Padding::WordsInText, + exceptions: &[], + } + } +} + +impl CheckType for Existence { + fn check(&self, text: &str, check: &Check) -> Vec { + let offset = self.padding.to_offset_from(check.offset); + + let rx = &self.padding.pad( + &(if self.items.len() > 1 { + Padding::SafeJoin.pad(&self.items.join("|")) + } else { + self.items.first().unwrap().to_string() + }), + ); + + let regex_exceptions = self + .exceptions + .iter() + .map(|exception| { + RegexBuilder::new(exception) + .case_insensitive(check.ignore_case) + .build() + .unwrap() + }) + .collect::>(); + + regex::RegexBuilder::new(rx) + .case_insensitive(check.ignore_case) + .build() + .unwrap() + .find_iter(text) + .filter_map(|m| { + let match_text = m.as_str().trim(); + (!regex_exceptions + .iter() + .any(|exception| exception.is_match(match_text))) + .then(|| CheckResult { + start_pos: m.start() + offset[0], + end_pos: m.end() + offset[1], + check_name: check.path, + message: check.msg.to_string(), + replacements: None, + }) + }) + .collect() + } +} + +pub struct ExistenceSimple { + pub pattern: &'static str, + pub exceptions: &'static [&'static str], +} + +impl ExistenceSimple { + pub const fn default() -> Self { + ExistenceSimple { pattern: "", exceptions: &[] } + } +} + +impl CheckType for ExistenceSimple { + fn check(&self, text: &str, check: &Check) -> Vec { + // TODO: this should be a RegexBuilder with case_insensitive + // held up by fancy-regex#132 + let regex_pattern = fancy_regex::Regex::new(self.pattern).unwrap(); + let regex_exceptions = self + .exceptions + .iter() + .map(|exception| fancy_regex::Regex::new(exception).unwrap()) + .collect::>(); + regex_pattern + .find_iter(text) + .filter_map(|x| { + x.ok().and_then(|m| { + (!regex_exceptions.iter().any(|exception| { + exception.is_match(m.as_str()).unwrap() + })) + .then(|| CheckResult { + start_pos: m.start(), + end_pos: m.end(), + check_name: check.path, + message: check.msg.to_string(), + replacements: None, + }) + }) + }) + .collect() + } +} + +pub struct ReverseExistence { + pub allowed: &'static [&'static str], +} + +impl ReverseExistence { + pub const fn default() -> Self { + ReverseExistence { allowed: &[] } + } +} + +impl CheckType for ReverseExistence { + fn check(&self, text: &str, check: &Check) -> Vec { + let tokenizer = Regex::new(r"\w[\w'-]+\w").unwrap(); + tokenizer + .find_iter(text) + .filter_map(|m| { + let match_text = m.as_str(); + (!match_text.chars().any(|c| c.is_ascii_digit()) + && !self.allowed.contains(&match_text)) + .then(|| CheckResult { + start_pos: m.start() + check.offset[0] + 1, + end_pos: m.end() + check.offset[1], + check_name: check.path, + message: check.msg.to_string(), + replacements: None, + }) + }) + .collect() + } +} diff --git a/proselint-rs/crates/proselint/src/utils.rs b/proselint-rs/crates/proselint/src/utils.rs index 51843adca..202af5ecd 100644 --- a/proselint-rs/crates/proselint/src/utils.rs +++ b/proselint-rs/crates/proselint/src/utils.rs @@ -1,8 +1,8 @@ use std::{collections::HashMap, path::PathBuf}; -use proselint_registry::checks::{Check, LintResult}; -use walkdir::WalkDir; +use proselint_registry::checks::{types::CheckType, Check, LintResult}; use rayon::prelude::*; +use walkdir::WalkDir; use crate::config::base::Config; @@ -71,7 +71,7 @@ pub fn is_quoted(pos: usize, text: &str) -> bool { pub fn run_check(check: Check, text: &str, source: &str) -> Vec { check - .dispatch(text) + .check(text) .iter() .filter_map(|result| { let (line, column) = get_line_and_column(text, result.start_pos); @@ -120,8 +120,7 @@ pub fn extract_files(paths: Vec) -> Vec { let mut expanded_files: Vec = vec![]; for path in paths { if path.is_dir() { - for entry in WalkDir::new(path).into_iter().filter_map(|e| e.ok()) - { + for entry in WalkDir::new(path).into_iter().filter_map(|e| e.ok()) { if entry.path().extension().is_some_and(|ext| { VALID_EXTENSIONS.contains(&ext.to_str().unwrap()) }) {