diff --git a/.github/workflows/external-tests.yml b/.github/workflows/external-tests.yml index 0753ddb2..628a6cea 100644 --- a/.github/workflows/external-tests.yml +++ b/.github/workflows/external-tests.yml @@ -111,10 +111,12 @@ jobs: shell: bash run: make test-version - - name: Run bun tests + - name: Run non windows tests shell: bash if: ${{ matrix.os != 'windows-latest' }} - run: make test-bun + run: | + make test-bun + make test-c-api - name: Install Node.js if: ${{ matrix.os != 'ubuntu-latest' }} diff --git a/Cargo.lock b/Cargo.lock index f2deab2f..b49f3c3c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -141,9 +141,9 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.70.1" +version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ "bitflags 2.11.0", "cexpr", @@ -154,7 +154,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash 1.1.0", + "rustc-hash", "shlex", "syn", ] @@ -676,7 +676,7 @@ dependencies = [ "log", "notify", "notify-types", - "rustc-hash 2.1.2", + "rustc-hash", "walkdir", ] @@ -1474,7 +1474,7 @@ dependencies = [ "mlua_derive", "num-traits", "parking_lot", - "rustc-hash 2.1.2", + "rustc-hash", "rustversion", ] @@ -2065,12 +2065,6 @@ dependencies = [ "syn", ] -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - [[package]] name = "rustc-hash" version = "2.1.2" @@ -3152,9 +3146,9 @@ dependencies = [ [[package]] name = "zlob" -version = "1.3.3" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41f3522fa9701b74ec72758aedb96da278f6e0533bc16b6a79090bfb465d4661" +checksum = "466e82062db3527af78a7627a0e066f2420f8d2e573d530956fb9192956dc7b6" dependencies = [ "bindgen", "bitflags 2.11.0", diff --git a/Cargo.toml b/Cargo.toml index a8d9358d..4cad5f28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ heed = "0.22.0" ignore = "0.4.22" memmap2 = "0.9" mimalloc = "0.1.47" -zlob = "1.3.3" +zlob = "1.4.1" mlua = { version = "0.11.1", features = ["module", "luajit"] } neo_frizbee = { version = "0.10.2", features = ["match_end_col"] } diff --git a/Makefile b/Makefile index 295435df..a0b6f529 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ SHELL := bash # string rather than the literal `-o` / `pipefail` tokens. .SHELLFLAGS := -o pipefail -ec -.PHONY: build build-c-lib install uninstall test test-rust test-lua test-lua-snap test-version test-bun test-node prepare-bun prepare-node set-npm-version header test-stress test-stress-seeded test-stress-random test-stress-repos test-node-stress +.PHONY: build build-c-lib install uninstall test test-rust test-c-smoke test-c-api test-lua test-lua-snap test-version test-bun test-node prepare-bun prepare-node set-npm-version header test-stress test-stress-seeded test-stress-random test-stress-repos test-node-stress all: format test lint @@ -68,6 +68,23 @@ test-setup: test-rust: cargo test --workspace --features zlob --exclude fff-nvim +CC ?= cc +CFLAGS ?= -O0 -g -Wall -Wextra -std=c99 +TARGET_DIR ?= target/release +SMOKE_BIN := $(TARGET_DIR)/fff_c_smoke +SMOKE_SRC := crates/fff-c/tests/smoke.c +SMOKE_INCLUDE := crates/fff-c/include + +test-c-smoke: build-c-lib + $(CC) $(CFLAGS) -I $(SMOKE_INCLUDE) -L $(TARGET_DIR) \ + -Wl,-rpath,@loader_path/../target/release \ + -Wl,-rpath,$$(pwd)/$(TARGET_DIR) \ + $(SMOKE_SRC) -lfff_c -o $(SMOKE_BIN) + $(SMOKE_BIN) . + +# Alias kept for the `external-tests.yml` workflow naming. +test-c-api: test-c-smoke + # neovim instance swallows internal crashes and doesn't rise the the error exiting silently # so check the stdout in case the sigsegv coming out of fff was printed (actual regression). # Output is streamed live via `tee`; pipefail (set above) propagates nvim's exit. diff --git a/README.md b/README.md index fe5491d7..d19b5c0f 100644 --- a/README.md +++ b/README.md @@ -475,6 +475,9 @@ const hits = finder.value.grep("GetOffTheRecordProfile", { classifyDefinitions: true, }); +// Run extremely fast glob matching which is significantly (10-100 times) faster than Bun's and Node implementation +const rustFiles = finder.value.glob("**/*.rs", { pageSize: 100 }); + finder.value.destroy(); ``` @@ -580,6 +583,35 @@ int main(void) { } ``` +### Versioned options struct (preferred) + +For instance creation use [`FffCreateOptions`](./crates/fff-c/include/fff.h) — a +versioned struct that evolves without ABI breaks. C99 designated +initializers keep call sites readable and zero-init unspecified fields: + +```c +FffResult *res = fff_create_instance_with(&(FffCreateOptions){ + .version = FFF_CREATE_OPTIONS_VERSION, + .base_path = "/path/to/repo", + .ai_mode = true, + .watch = true, + .enable_fs_root_scanning = false, // off by default + .enable_home_dir_scanning = false, // off by default +}); +``` + +### Glob-only search + +`fff_glob` filters indexed files by a single glob pattern, ranks by frecency, +paginates — bypasses the regular query parser entirely. Use this when you +already have a literal glob (`*.rs`, `**/*.test.ts`, `src/**`) and don't want +fuzzy matching layered on top. + +```c +FffResult *res = fff_glob(handle, "**/*.rs", "", 0, 0, 100); +// FffSearchResult in res->handle, free with fff_free_search_result. +``` + ### Notes - Every function returning `FffResult*` allocates with Rust's `Box`. Free with `fff_free_result`, do not use malloc's free diff --git a/crates/fff-c/cbindgen.toml b/crates/fff-c/cbindgen.toml index fdcb098e..86c67f9f 100644 --- a/crates/fff-c/cbindgen.toml +++ b/crates/fff-c/cbindgen.toml @@ -26,3 +26,8 @@ include = [ [fn] sort_by = "None" +# Translate `#[deprecated]` on extern "C" fns into a real C compiler +# attribute so callers get a warning when they use a removed/legacy entry. +# `{}` is substituted with the Rust deprecation note as a C string literal +# (already quoted) — do not wrap in extra quotes. +deprecated_with_note = "__attribute__((deprecated({})))" diff --git a/crates/fff-c/include/fff.h b/crates/fff-c/include/fff.h index 8a51e996..704ffbd5 100644 --- a/crates/fff-c/include/fff.h +++ b/crates/fff-c/include/fff.h @@ -9,10 +9,17 @@ #include #include +/** + * Current used version of [`FffCreateOptions`]. + */ +#define FFF_CREATE_OPTIONS_VERSION 1 + /** * Result envelope returned by all `fff_*` functions. * - * Heap-allocated — the caller must free it with `fff_free_result`. + * Heap-allocated. The caller must free it with `fff_free_result`. Calling `fff_free_result` + * **does not** deallocate the underlying `handle` pointer. It needs to be cleaned separately. + * see (`fff_destroy`, `fff_free_search_result`, `fff_free_grep_result`, `fff_free_string`, etc.). * * Depending on the function, the payload is delivered through different fields: * @@ -32,11 +39,6 @@ * | `fff_restart_index` | (none) | success flag only | * * On failure, `success` is false and `error` contains the message. - * - * **Important:** `fff_free_result` frees `error` but does **not** free `handle`. - * The caller must free the handle with the appropriate function - * (`fff_destroy`, `fff_free_search_result`, `fff_free_grep_result`, - * `fff_free_string`, etc.). */ typedef struct FffResult { /** @@ -48,7 +50,7 @@ typedef struct FffResult { */ char *error; /** - * Opaque pointer payload (instance handle, typed result struct, or string). May be null. + * Opaque pointer payload. May be null. */ void *handle; /** @@ -57,6 +59,79 @@ typedef struct FffResult { int64_t int_value; } FffResult; +/** + * Options for `fff_create_instance_with`. + * + * Versioned struct: you populate the struct at your call level, we guarantee that + * the version is stable across the version changes, new fields only appended! + */ +typedef struct FffCreateOptions { + /** + * Set to [`FFF_CREATE_OPTIONS_VERSION`] when allocating. Used by the + * library to determine which trailing fields are populated. + */ + uint32_t version; + /** + * Directory to index (required, non-NULL). + */ + const char *base_path; + /** + * Frecency LMDB database path. NULL/empty to skip frecency tracking. + */ + const char *frecency_db_path; + /** + * Query history LMDB database path. NULL/empty to skip query tracking. + */ + const char *history_db_path; + /** + * Pre-populate mmap caches for top-frecency files after the initial scan. + */ + bool enable_mmap_cache; + /** + * Build content index after the initial scan for faster grep. + */ + bool enable_content_indexing; + /** + * Start a background file-system watcher for live updates. + */ + bool watch; + /** + * Enable AI-agent optimizations. + */ + bool ai_mode; + /** + * Tracing log file path. NULL/empty to skip log init. + */ + const char *log_file_path; + /** + * Log level: `"trace" | "debug" | "info" | "warn" | "error"`. + * NULL/empty defaults to `"info"`. Ignored when `log_file_path` is unset. + */ + const char *log_level; + /** + * Content cache file-count cap. 0 = auto. + */ + uint64_t cache_budget_max_files; + /** + * Content cache byte cap. 0 = auto. + */ + uint64_t cache_budget_max_bytes; + /** + * Per-file byte cap inside the content cache. 0 = auto. + */ + uint64_t cache_budget_max_file_size; + /** + * Allow indexing the filesystem root (`/`). Off by default — root is + * rarely the intended target and floods the watcher with churn. + */ + bool enable_fs_root_scanning; + /** + * Allow indexing the user's home directory. Same trade-off as + * `enable_fs_root_scanning`. + */ + bool enable_home_dir_scanning; +} FffCreateOptions; + /** * A file item returned by `fff_search`. * @@ -346,18 +421,18 @@ typedef struct FffMixedSearchResult { } FffMixedSearchResult; /** - * Create a new file finder instance (legacy signature). + * Create a new file finder instance (legacy 8-arg positional signature). * - * @deprecated prefer `fff_create_instance2`, which also exposes log file and - * cache-budget configuration. This function delegates to `fff_create_instance2` - * with NULL log paths and auto cache budget, so behaviour is unchanged. - * - * The `use_unsafe_no_lock` parameter is deprecated and ignored; see - * [`fff_create_instance2`] for details. + * @deprecated Use [`fff_create_instance_with`] (or + * [`fff_create_instance_with_value`] for FFI bindings) — both take the + * versioned [`FffCreateOptions`] struct that evolves without ABI breaks. + * This function delegates to `fff_create_instance_with` internally; the + * `use_unsafe_no_lock` parameter is deprecated and ignored. * * ## Safety - * See `fff_create_instance2`. + * See `fff_create_instance_with`. */ +__attribute__((deprecated("Use fff_create_instance_with (by pointer) or fff_create_instance_with_value (by value) with FffCreateOptions instead. The struct evolves without ABI breaks."))) struct FffResult *fff_create_instance(const char *base_path, const char *frecency_db_path, const char *history_db_path, @@ -368,37 +443,17 @@ struct FffResult *fff_create_instance(const char *base_path, bool ai_mode); /** - * Create a new file finder instance (v2, with full options). - * - * Returns an opaque pointer that must be passed to all other `fff_*` calls - * and eventually freed with `fff_destroy`. - * - * # Parameters - * - * * `base_path` – directory to index (required) - * * `frecency_db_path` – frecency LMDB database path (NULL/empty to skip) - * * `history_db_path` – query history LMDB database path (NULL/empty to skip) - * * `use_unsafe_no_lock` – **deprecated, ignored.** Previously enabled - * * `enable_mmap_cache` – pre-populate mmap caches after the initial scan - * * `enable_content_indexing` – build content index after the initial scan - * * `watch` – start a background file-system watcher for live updates - * * `ai_mode` – enable AI-agent optimizations - * * `log_file_path` – tracing log file path (NULL/empty to skip). - * Only the first successful call in a process installs the subscriber; - * subsequent calls are no-ops at the log layer. - * * `log_level` – `"trace"`, `"debug"`, `"info"`, `"warn"`, `"error"` - * (NULL/empty defaults to `"info"`). Ignored when `log_file_path` is not set. - * * `cache_budget_max_files` – content cache file-count cap (0 = auto) - * * `cache_budget_max_bytes` – content cache byte cap (0 = auto) - * * `cache_budget_max_file_size` – per-file byte cap (0 = auto) + * Create a new file finder instance (legacy 13-arg positional signature). * - * When all three `cache_budget_*` values are 0 the budget is auto-computed - * from repo size after the initial scan. Otherwise an explicit budget is - * used: any field left at 0 falls back to its `unlimited()` default. + * @deprecated Use [`fff_create_instance_with`] (or + * [`fff_create_instance_with_value`] for FFI bindings) — both take the + * versioned [`FffCreateOptions`] struct that evolves without ABI breaks. + * The `use_unsafe_no_lock` parameter is deprecated and ignored. * * ## Safety - * String parameters must be valid null-terminated UTF-8 or NULL. + * See `fff_create_instance_with`. */ +__attribute__((deprecated("Use fff_create_instance_with (by pointer) or fff_create_instance_with_value (by value) with FffCreateOptions instead. The struct evolves without ABI breaks."))) struct FffResult *fff_create_instance2(const char *base_path, const char *frecency_db_path, const char *history_db_path, @@ -413,6 +468,50 @@ struct FffResult *fff_create_instance2(const char *base_path, uint64_t cache_budget_max_bytes, uint64_t cache_budget_max_file_size); +/** + * Create a new file finder instance from an [`FffCreateOptions`] struct. + * + * **Direct C consumers** populate the struct (designated initializers + * recommended), set `version` to [`FFF_CREATE_OPTIONS_VERSION`], and pass + * it by pointer. New fields are appended in future versions; old callers + * passing `version = 1` keep working forever. + * + * **FFI consumers** that prefer struct-by-value semantics (e.g. ffi-rs's + * `paramsType: [structDef]`) should use [`fff_create_instance_with_value`] + * instead — it's a thin calling-convention adapter that delegates here. + * + * Required: `opts.base_path` must be non-NULL and non-empty. + * + * When all three `cache_budget_*` values are 0 the budget is auto-computed + * from repo size after the initial scan. Otherwise an explicit budget is + * used: any field left at 0 falls back to its `unlimited()` default. + * + * ## Safety + * * `opts` must be a valid pointer to an `FffCreateOptions` whose `version` + * is in the range `1..=FFF_CREATE_OPTIONS_VERSION`. + * * All string pointers inside `opts` must be valid null-terminated UTF-8 + * or NULL. + */ +struct FffResult *fff_create_instance_with(const struct FffCreateOptions *opts); + +/** + * Calling-convention adapter for [`fff_create_instance_with`]. + * + * Same logic, but takes the [`FffCreateOptions`] struct **by value**. This + * makes the function callable from FFI libraries whose native struct + * support passes structs by value on the wire (e.g. Node's `ffi-rs` with + * `paramsType: [structDef]`). + * + * This is **not** a versioned wrapper — when new fields are appended to + * `FffCreateOptions`, both this function and `fff_create_instance_with` + * pick them up automatically with no signature change. + * + * ## Safety + * All `*const c_char` fields inside `opts` must be valid null-terminated + * UTF-8 or NULL. The struct itself is consumed by value. + */ +struct FffResult *fff_create_instance_with_value(struct FffCreateOptions opts); + /** * Destroy a file finder instance and free all its resources. * @@ -448,6 +547,35 @@ struct FffResult *fff_search(void *fff_handle, int32_t combo_boost_multiplier, uint32_t min_combo_count); +/** + * Glob-only search: filter indexed files by a single glob pattern, rank by + * frecency, and paginate. Bypasses the regular query parser entirely. + * + * Use this when you already have a literal glob pattern (e.g. `*.rs`, a + * recursive `**` match, or `src/components` prefix) and want neither fuzzy + * matching nor multi-token constraint parsing. Ranking falls back to + * frecency because there is no fuzzy score to combine with. + * + * # Parameters + * + * * `fff_handle` - instance from `fff_create_instance` + * * `pattern` - glob pattern (required, no parsing - passed through verbatim) + * * `current_file` - path of the currently open file for deprioritization (NULL/empty to skip) + * * `max_threads` - maximum worker threads (0 = auto-detect) + * * `page_index` - pagination offset (0 = first page) + * * `page_size` - results per page (0 = default 100) + * + * ## Safety + * * `fff_handle` must be a valid instance pointer from `fff_create_instance`. + * * `pattern` and `current_file` must be valid null-terminated UTF-8 strings or NULL. + */ +struct FffResult *fff_glob(void *fff_handle, + const char *pattern, + const char *current_file, + uint32_t max_threads, + uint32_t page_index, + uint32_t page_size); + /** * Perform fuzzy search on indexed directories. * @@ -752,6 +880,12 @@ const void *fff_ptr_offset(const void *base, uintptr_t byte_offset); /** * Free a result returned by any `fff_*` function. + * **IMPORTANT:** this doesn't clean the the internal handle, so it is safe to call right after + * you handle the error case. + * + * Note: Many non-libffi implementations are not supporting struct-by-value returns, so it's more + * convenient to have pointer returned at most of the time, though allocating result for every call + * is annoying, so we just rely on the fact that our allocator is good enough. * * ## Safety * `result_ptr` must be a valid pointer returned by a `fff_*` function. diff --git a/crates/fff-c/src/ffi_types.rs b/crates/fff-c/src/ffi_types.rs index 1d8c08b2..d2b0e972 100644 --- a/crates/fff-c/src/ffi_types.rs +++ b/crates/fff-c/src/ffi_types.rs @@ -14,6 +14,75 @@ use fff::{ MixedSearchResult, Score, SearchResult, }; +/// Current used version of [`FffCreateOptions`]. +pub const FFF_CREATE_OPTIONS_VERSION: u32 = 1; + +/// Options for `fff_create_instance_with`. +/// +/// Versioned struct: you populate the struct at your call level, we guarantee that +/// the version is stable across the version changes, new fields only appended! +#[repr(C)] +pub struct FffCreateOptions { + /// Set to [`FFF_CREATE_OPTIONS_VERSION`] when allocating. Used by the + /// library to determine which trailing fields are populated. + pub version: u32, + /// Directory to index (required, non-NULL). + pub base_path: *const c_char, + /// Frecency LMDB database path. NULL/empty to skip frecency tracking. + pub frecency_db_path: *const c_char, + /// Query history LMDB database path. NULL/empty to skip query tracking. + pub history_db_path: *const c_char, + /// Pre-populate mmap caches for top-frecency files after the initial scan. + pub enable_mmap_cache: bool, + /// Build content index after the initial scan for faster grep. + pub enable_content_indexing: bool, + /// Start a background file-system watcher for live updates. + pub watch: bool, + /// Enable AI-agent optimizations. + pub ai_mode: bool, + /// Tracing log file path. NULL/empty to skip log init. + pub log_file_path: *const c_char, + /// Log level: `"trace" | "debug" | "info" | "warn" | "error"`. + /// NULL/empty defaults to `"info"`. Ignored when `log_file_path` is unset. + pub log_level: *const c_char, + /// Content cache file-count cap. 0 = auto. + pub cache_budget_max_files: u64, + /// Content cache byte cap. 0 = auto. + pub cache_budget_max_bytes: u64, + /// Per-file byte cap inside the content cache. 0 = auto. + pub cache_budget_max_file_size: u64, + /// Allow indexing the filesystem root (`/`). Off by default — root is + /// rarely the intended target and floods the watcher with churn. + pub enable_fs_root_scanning: bool, + /// Allow indexing the user's home directory. Same trade-off as + /// `enable_fs_root_scanning`. + pub enable_home_dir_scanning: bool, + // ----- new version 2+ fields go here, ALWAYS appended ----- +} + +impl FffCreateOptions { + /// Default values for a v1 options struct. + pub fn defaults() -> Self { + Self { + version: FFF_CREATE_OPTIONS_VERSION, + base_path: ptr::null(), + frecency_db_path: ptr::null(), + history_db_path: ptr::null(), + enable_mmap_cache: true, + enable_content_indexing: true, + watch: true, + ai_mode: false, + log_file_path: ptr::null(), + log_level: ptr::null(), + cache_budget_max_files: 0, + cache_budget_max_bytes: 0, + cache_budget_max_file_size: 0, + enable_fs_root_scanning: false, + enable_home_dir_scanning: false, + } + } +} + /// Allocate a heap CString from a `&str`, returning a raw pointer. fn cstring_new(s: &str) -> *mut c_char { CString::new(s).unwrap_or_default().into_raw() @@ -420,7 +489,9 @@ impl FffGrepResult { /// Result envelope returned by all `fff_*` functions. /// -/// Heap-allocated — the caller must free it with `fff_free_result`. +/// Heap-allocated. The caller must free it with `fff_free_result`. Calling `fff_free_result` +/// **does not** deallocate the underlying `handle` pointer. It needs to be cleaned separately. +/// see (`fff_destroy`, `fff_free_search_result`, `fff_free_grep_result`, `fff_free_string`, etc.). /// /// Depending on the function, the payload is delivered through different fields: /// @@ -440,18 +511,13 @@ impl FffGrepResult { /// | `fff_restart_index` | (none) | success flag only | /// /// On failure, `success` is false and `error` contains the message. -/// -/// **Important:** `fff_free_result` frees `error` but does **not** free `handle`. -/// The caller must free the handle with the appropriate function -/// (`fff_destroy`, `fff_free_search_result`, `fff_free_grep_result`, -/// `fff_free_string`, etc.). #[repr(C)] pub struct FffResult { /// Whether the operation succeeded. pub success: bool, /// Error message on failure. Null on success. pub error: *mut c_char, - /// Opaque pointer payload (instance handle, typed result struct, or string). May be null. + /// Opaque pointer payload. May be null. pub handle: *mut c_void, /// Integer payload for simple return values (bool as 0/1, counts, etc.). pub int_value: i64, @@ -725,3 +791,34 @@ impl From for FffScanProgress { } } } + +#[cfg(test)] +mod options_layout_tests { + use super::FffCreateOptions; + use std::mem::{align_of, offset_of, size_of}; + + // THIS TEST HAVE TO BE NEVER UPDATED ONLY ADDED NEW FIELDS + // this is needed to ensure ABI backward compatibility + #[test] + #[cfg(target_pointer_width = "64")] + fn fff_create_options_layout_is_stable_64bit() { + assert_eq!(size_of::(), 88); + assert_eq!(align_of::(), 8); + + assert_eq!(offset_of!(FffCreateOptions, version), 0); + assert_eq!(offset_of!(FffCreateOptions, base_path), 8); + assert_eq!(offset_of!(FffCreateOptions, frecency_db_path), 16); + assert_eq!(offset_of!(FffCreateOptions, history_db_path), 24); + assert_eq!(offset_of!(FffCreateOptions, enable_mmap_cache), 32); + assert_eq!(offset_of!(FffCreateOptions, enable_content_indexing), 33); + assert_eq!(offset_of!(FffCreateOptions, watch), 34); + assert_eq!(offset_of!(FffCreateOptions, ai_mode), 35); + assert_eq!(offset_of!(FffCreateOptions, log_file_path), 40); + assert_eq!(offset_of!(FffCreateOptions, log_level), 48); + assert_eq!(offset_of!(FffCreateOptions, cache_budget_max_files), 56); + assert_eq!(offset_of!(FffCreateOptions, cache_budget_max_bytes), 64); + assert_eq!(offset_of!(FffCreateOptions, cache_budget_max_file_size), 72); + assert_eq!(offset_of!(FffCreateOptions, enable_fs_root_scanning), 80); + assert_eq!(offset_of!(FffCreateOptions, enable_home_dir_scanning), 81); + } +} diff --git a/crates/fff-c/src/lib.rs b/crates/fff-c/src/lib.rs index 83f18a76..be76416c 100644 --- a/crates/fff-c/src/lib.rs +++ b/crates/fff-c/src/lib.rs @@ -37,8 +37,9 @@ use fff::query_tracker::QueryTracker; use fff::{DbHealthChecker, FFFMode, FuzzySearchOptions, PaginationArgs, QueryParser}; use fff::{SharedFilePicker, SharedFrecency}; use ffi_types::{ - FffDirItem, FffDirSearchResult, FffFileItem, FffGrepMatch, FffGrepResult, FffMixedItem, - FffMixedSearchResult, FffResult, FffScanProgress, FffScore, FffSearchResult, + FFF_CREATE_OPTIONS_VERSION, FffCreateOptions, FffDirItem, FffDirSearchResult, FffFileItem, + FffGrepMatch, FffGrepResult, FffMixedItem, FffMixedSearchResult, FffResult, FffScanProgress, + FffScore, FffSearchResult, }; /// Opaque fff_handle holding all per-instance state. @@ -104,17 +105,20 @@ fn default_i32(val: i32, default: i32) -> i32 { if val == 0 { default } else { val } } -/// Create a new file finder instance (legacy signature). +/// Create a new file finder instance (legacy 8-arg positional signature). /// -/// @deprecated prefer `fff_create_instance2`, which also exposes log file and -/// cache-budget configuration. This function delegates to `fff_create_instance2` -/// with NULL log paths and auto cache budget, so behaviour is unchanged. -/// -/// The `use_unsafe_no_lock` parameter is deprecated and ignored; see -/// [`fff_create_instance2`] for details. +/// @deprecated Use [`fff_create_instance_with`] (or +/// [`fff_create_instance_with_value`] for FFI bindings) — both take the +/// versioned [`FffCreateOptions`] struct that evolves without ABI breaks. +/// This function delegates to `fff_create_instance_with` internally; the +/// `use_unsafe_no_lock` parameter is deprecated and ignored. /// /// ## Safety -/// See `fff_create_instance2`. +/// See `fff_create_instance_with`. +#[deprecated( + since = "0.8.5", + note = "Use fff_create_instance_with (by pointer) or fff_create_instance_with_value (by value) with FffCreateOptions instead. The struct evolves without ABI breaks." +)] #[unsafe(no_mangle)] pub unsafe extern "C" fn fff_create_instance( base_path: *const c_char, @@ -126,59 +130,30 @@ pub unsafe extern "C" fn fff_create_instance( watch: bool, ai_mode: bool, ) -> *mut FffResult { - unsafe { - fff_create_instance2( - base_path, - frecency_db_path, - history_db_path, - false, - enable_mmap_cache, - enable_content_indexing, - watch, - ai_mode, - std::ptr::null(), - std::ptr::null(), - 0, - 0, - 0, - ) - } + let mut opts = FffCreateOptions::defaults(); + opts.base_path = base_path; + opts.frecency_db_path = frecency_db_path; + opts.history_db_path = history_db_path; + opts.enable_mmap_cache = enable_mmap_cache; + opts.enable_content_indexing = enable_content_indexing; + opts.watch = watch; + opts.ai_mode = ai_mode; + unsafe { fff_create_instance_with(&opts as *const FffCreateOptions) } } -/// Create a new file finder instance (v2, with full options). -/// -/// Returns an opaque pointer that must be passed to all other `fff_*` calls -/// and eventually freed with `fff_destroy`. -/// -/// # Parameters -/// -/// * `base_path` – directory to index (required) -/// * `frecency_db_path` – frecency LMDB database path (NULL/empty to skip) -/// * `history_db_path` – query history LMDB database path (NULL/empty to skip) -/// * `use_unsafe_no_lock` – **deprecated, ignored.** Previously enabled -/// `MDB_NOLOCK|MDB_NOSYNC|MDB_NOMETASYNC` for LMDB; benchmarks showed no -/// measurable win under realistic contention, so the flag is now a no-op. -/// The parameter remains in the signature for ABI compatibility and will be -/// removed in a future release. -/// * `enable_mmap_cache` – pre-populate mmap caches after the initial scan -/// * `enable_content_indexing` – build content index after the initial scan -/// * `watch` – start a background file-system watcher for live updates -/// * `ai_mode` – enable AI-agent optimizations -/// * `log_file_path` – tracing log file path (NULL/empty to skip). -/// Only the first successful call in a process installs the subscriber; -/// subsequent calls are no-ops at the log layer. -/// * `log_level` – `"trace"`, `"debug"`, `"info"`, `"warn"`, `"error"` -/// (NULL/empty defaults to `"info"`). Ignored when `log_file_path` is not set. -/// * `cache_budget_max_files` – content cache file-count cap (0 = auto) -/// * `cache_budget_max_bytes` – content cache byte cap (0 = auto) -/// * `cache_budget_max_file_size` – per-file byte cap (0 = auto) +/// Create a new file finder instance (legacy 13-arg positional signature). /// -/// When all three `cache_budget_*` values are 0 the budget is auto-computed -/// from repo size after the initial scan. Otherwise an explicit budget is -/// used: any field left at 0 falls back to its `unlimited()` default. +/// @deprecated Use [`fff_create_instance_with`] (or +/// [`fff_create_instance_with_value`] for FFI bindings) — both take the +/// versioned [`FffCreateOptions`] struct that evolves without ABI breaks. +/// The `use_unsafe_no_lock` parameter is deprecated and ignored. /// /// ## Safety -/// String parameters must be valid null-terminated UTF-8 or NULL. +/// See `fff_create_instance_with`. +#[deprecated( + since = "0.8.5", + note = "Use fff_create_instance_with (by pointer) or fff_create_instance_with_value (by value) with FffCreateOptions instead. The struct evolves without ABI breaks." +)] #[unsafe(no_mangle)] pub unsafe extern "C" fn fff_create_instance2( base_path: *const c_char, @@ -195,27 +170,76 @@ pub unsafe extern "C" fn fff_create_instance2( cache_budget_max_bytes: u64, cache_budget_max_file_size: u64, ) -> *mut FffResult { - let base_path_str = match unsafe { cstr_to_str(base_path) } { + let mut opts = FffCreateOptions::defaults(); + opts.base_path = base_path; + opts.frecency_db_path = frecency_db_path; + opts.history_db_path = history_db_path; + opts.enable_mmap_cache = enable_mmap_cache; + opts.enable_content_indexing = enable_content_indexing; + opts.watch = watch; + opts.ai_mode = ai_mode; + opts.log_file_path = log_file_path; + opts.log_level = log_level; + opts.cache_budget_max_files = cache_budget_max_files; + opts.cache_budget_max_bytes = cache_budget_max_bytes; + opts.cache_budget_max_file_size = cache_budget_max_file_size; + unsafe { fff_create_instance_with(&opts as *const FffCreateOptions) } +} + +/// Create a new file finder instance from an [`FffCreateOptions`] struct. +/// +/// **Direct C consumers** populate the struct (designated initializers +/// recommended), set `version` to [`FFF_CREATE_OPTIONS_VERSION`], and pass +/// it by pointer. New fields are appended in future versions; old callers +/// passing `version = 1` keep working forever. +/// +/// **FFI consumers** that prefer struct-by-value semantics (e.g. ffi-rs's +/// `paramsType: [structDef]`) should use [`fff_create_instance_with_value`] +/// instead — it's a thin calling-convention adapter that delegates here. +/// +/// Required: `opts.base_path` must be non-NULL and non-empty. +/// +/// When all three `cache_budget_*` values are 0 the budget is auto-computed +/// from repo size after the initial scan. Otherwise an explicit budget is +/// used: any field left at 0 falls back to its `unlimited()` default. +/// +/// ## Safety +/// * `opts` must be a valid pointer to an `FffCreateOptions` whose `version` +/// is in the range `1..=FFF_CREATE_OPTIONS_VERSION`. +/// * All string pointers inside `opts` must be valid null-terminated UTF-8 +/// or NULL. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn fff_create_instance_with(opts: *const FffCreateOptions) -> *mut FffResult { + if opts.is_null() { + return FffResult::err("opts is null"); + } + let opts = unsafe { &*opts }; + if opts.version == 0 || opts.version > FFF_CREATE_OPTIONS_VERSION { + return FffResult::err(&format!( + "Unsupported FffCreateOptions version {} (library understands up to {})", + opts.version, FFF_CREATE_OPTIONS_VERSION + )); + } + + let base_path_str = match unsafe { cstr_to_str(opts.base_path) } { Some(s) if !s.is_empty() => s.to_string(), - _ => return FffResult::err("base_path is null or empty"), + _ => return FffResult::err("opts.base_path is null or empty"), }; - if let Some(log_path) = unsafe { optional_cstr(log_file_path) } { - let level = unsafe { optional_cstr(log_level) }; + if let Some(log_path) = unsafe { optional_cstr(opts.log_file_path) } { + let level = unsafe { optional_cstr(opts.log_level) }; if let Err(e) = fff::log::init_tracing(log_path, level) { return FffResult::err(&format!("Failed to init tracing: {}", e)); } } - let frecency_path = unsafe { optional_cstr(frecency_db_path) }.map(|s| s.to_string()); - let history_path = unsafe { optional_cstr(history_db_path) }.map(|s| s.to_string()); + let frecency_path = unsafe { optional_cstr(opts.frecency_db_path) }.map(|s| s.to_string()); + let history_path = unsafe { optional_cstr(opts.history_db_path) }.map(|s| s.to_string()); - // Create shared state that background threads will write into. let shared_picker = SharedFilePicker::default(); let shared_frecency = SharedFrecency::default(); let query_tracker = SharedQueryTracker::default(); - // Initialize frecency tracker if path is provided if let Some(ref frecency_path) = frecency_path { if let Some(parent) = PathBuf::from(frecency_path).parent() { let _ = std::fs::create_dir_all(parent); @@ -231,7 +255,6 @@ pub unsafe extern "C" fn fff_create_instance2( } } - // Initialize query tracker if path is provided if let Some(ref history_path) = history_path { if let Some(parent) = PathBuf::from(history_path).parent() { let _ = std::fs::create_dir_all(parent); @@ -247,30 +270,31 @@ pub unsafe extern "C" fn fff_create_instance2( } } - let mode = if ai_mode { + let mode = if opts.ai_mode { FFFMode::Ai } else { FFFMode::Neovim }; let cache_budget = fff::ContentCacheBudget::from_overrides( - cache_budget_max_files as usize, - cache_budget_max_bytes, - cache_budget_max_file_size, + opts.cache_budget_max_files as usize, + opts.cache_budget_max_bytes, + opts.cache_budget_max_file_size, ); - // Initialize file picker (writes directly into shared_picker) if let Err(e) = FilePicker::new_with_shared_state( shared_picker.clone(), shared_frecency.clone(), fff::FilePickerOptions { base_path: base_path_str, - enable_mmap_cache, - enable_content_indexing, - watch, + enable_mmap_cache: opts.enable_mmap_cache, + enable_content_indexing: opts.enable_content_indexing, + watch: opts.watch, mode, cache_budget, follow_symlinks: false, + enable_fs_root_scanning: opts.enable_fs_root_scanning, + enable_home_dir_scanning: opts.enable_home_dir_scanning, }, ) { return FffResult::err(&format!("Failed to init file picker: {}", e)); @@ -286,6 +310,25 @@ pub unsafe extern "C" fn fff_create_instance2( FffResult::ok_handle(fff_handle) } +/// Calling-convention adapter for [`fff_create_instance_with`]. +/// +/// Same logic, but takes the [`FffCreateOptions`] struct **by value**. This +/// makes the function callable from FFI libraries whose native struct +/// support passes structs by value on the wire (e.g. Node's `ffi-rs` with +/// `paramsType: [structDef]`). +/// +/// This is **not** a versioned wrapper — when new fields are appended to +/// `FffCreateOptions`, both this function and `fff_create_instance_with` +/// pick them up automatically with no signature change. +/// +/// ## Safety +/// All `*const c_char` fields inside `opts` must be valid null-terminated +/// UTF-8 or NULL. The struct itself is consumed by value. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn fff_create_instance_with_value(opts: FffCreateOptions) -> *mut FffResult { + unsafe { fff_create_instance_with(&opts as *const FffCreateOptions) } +} + /// Destroy a file finder instance and free all its resources. /// /// ## Safety @@ -396,6 +439,79 @@ pub unsafe extern "C" fn fff_search( FffResult::ok_handle(search_result as *mut c_void) } +/// Glob-only search: filter indexed files by a single glob pattern, rank by +/// frecency, and paginate. Bypasses the regular query parser entirely. +/// +/// Use this when you already have a literal glob pattern (e.g. `*.rs`, a +/// recursive `**` match, or `src/components` prefix) and want neither fuzzy +/// matching nor multi-token constraint parsing. Ranking falls back to +/// frecency because there is no fuzzy score to combine with. +/// +/// # Parameters +/// +/// * `fff_handle` - instance from `fff_create_instance` +/// * `pattern` - glob pattern (required, no parsing - passed through verbatim) +/// * `current_file` - path of the currently open file for deprioritization (NULL/empty to skip) +/// * `max_threads` - maximum worker threads (0 = auto-detect) +/// * `page_index` - pagination offset (0 = first page) +/// * `page_size` - results per page (0 = default 100) +/// +/// ## Safety +/// * `fff_handle` must be a valid instance pointer from `fff_create_instance`. +/// * `pattern` and `current_file` must be valid null-terminated UTF-8 strings or NULL. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn fff_glob( + fff_handle: *mut c_void, + pattern: *const c_char, + current_file: *const c_char, + max_threads: u32, + page_index: u32, + page_size: u32, +) -> *mut FffResult { + let inst = match unsafe { instance_ref(fff_handle) } { + Ok(i) => i, + Err(e) => return e, + }; + + let pattern_str = match unsafe { cstr_to_str(pattern) } { + Some(s) if !s.is_empty() => s, + _ => return FffResult::err("Pattern is null, empty, or invalid UTF-8"), + }; + + let current_file_str = unsafe { optional_cstr(current_file) }; + let page_size = default_u32(page_size, 100) as usize; + + let picker_guard = match inst.picker.read() { + Ok(g) => g, + Err(e) => return FffResult::err(&format!("Failed to acquire file picker lock: {}", e)), + }; + + let picker = match picker_guard.as_ref() { + Some(p) => p, + None => { + return FffResult::err("File picker not initialized. Call fff_create_instance first."); + } + }; + + let results = picker.glob( + pattern_str, + FuzzySearchOptions { + max_threads: max_threads as usize, + current_file: current_file_str, + project_path: Some(picker.base_path()), + combo_boost_score_multiplier: 0, + min_combo_count: 0, + pagination: PaginationArgs { + offset: page_index as usize, + limit: page_size, + }, + }, + ); + + let search_result = FffSearchResult::from_core(&results, picker); + FffResult::ok_handle(search_result as *mut c_void) +} + /// Perform fuzzy search on indexed directories. /// /// # Parameters @@ -902,16 +1018,19 @@ pub unsafe extern "C" fn fff_restart_index( Err(e) => return FffResult::err(&format!("Failed to acquire file picker lock: {}", e)), }; - let (warmup_caches, content_indexing, watch, mode) = if let Some(ref picker) = *guard { - ( - picker.has_mmap_cache(), - picker.has_content_indexing(), - picker.has_watcher(), - picker.mode(), - ) - } else { - (false, true, true, FFFMode::default()) - }; + let (warmup_caches, content_indexing, watch, mode, fs_root, home_dir) = + if let Some(ref picker) = *guard { + ( + picker.has_mmap_cache(), + picker.has_content_indexing(), + picker.has_watcher(), + picker.mode(), + picker.fs_root_scanning_enabled(), + picker.home_dir_scanning_enabled(), + ) + } else { + (false, true, true, FFFMode::default(), false, false) + }; drop(guard); @@ -926,6 +1045,8 @@ pub unsafe extern "C" fn fff_restart_index( mode, cache_budget: None, follow_symlinks: false, + enable_fs_root_scanning: fs_root, + enable_home_dir_scanning: home_dir, }, ) { Ok(()) => FffResult::ok_empty(), @@ -1393,6 +1514,12 @@ pub unsafe extern "C" fn fff_ptr_offset(base: *const c_void, byte_offset: usize) } /// Free a result returned by any `fff_*` function. +/// **IMPORTANT:** this doesn't clean the the internal handle, so it is safe to call right after +/// you handle the error case. +/// +/// Note: Many non-libffi implementations are not supporting struct-by-value returns, so it's more +/// convenient to have pointer returned at most of the time, though allocating result for every call +/// is annoying, so we just rely on the fact that our allocator is good enough. /// /// ## Safety /// `result_ptr` must be a valid pointer returned by a `fff_*` function. diff --git a/crates/fff-c/tests/smoke.c b/crates/fff-c/tests/smoke.c new file mode 100644 index 00000000..b0219530 --- /dev/null +++ b/crates/fff-c/tests/smoke.c @@ -0,0 +1,89 @@ +/* + * Smoke test for libfff_c — the smallest possible end-to-end exercise of + * the public C API. We: + * + * 1. Create a picker with an `FffCreateOptions` populated via C99 + * designated initializers (the recommended idiom for direct C use). + * 2. Wait for the initial scan to complete. + * 3. Search for "smoke.c". + * 4. Fail unless this very file appears in the results. + * + * Build + run via `make test-c-smoke`. Override $(CC) to test other + * compilers. + */ + +#include +#include +#include + +int main(int argc, char **argv) { + const char *base_path = argc > 1 ? argv[1] : "."; + + // make sure that FFF C api is designed more for FFI rather than for direct C usage (I'm sorry) + struct FffResult *create_result = fff_create_instance_with(&(struct FffCreateOptions){ + .version = FFF_CREATE_OPTIONS_VERSION, + .base_path = base_path, + .enable_mmap_cache = false, + .enable_content_indexing = false, + .watch = false, + }); + + if (!create_result->success) { + fprintf(stderr, "fff couldn't create instance: %s\n", + create_result->error ? create_result->error : "?"); + fff_free_result(create_result); + return 1; + } + + void *file_picker = create_result->handle; + fff_free_result(create_result); // safe to drop now: handle outlives the envelope + + struct FffResult *scan_result = fff_wait_for_scan(file_picker, 5000); + if (!scan_result->success) { + fprintf(stderr, "wait_for_scan failed: %s\n", + scan_result->error ? scan_result->error : "?"); + fff_free_result(scan_result); + fff_destroy(file_picker); + return 1; + } + // int_value: 1 = scan completed in time, 0 = timed out. + if (scan_result->int_value == 0) { + fprintf(stderr, "wait_for_scan: timed out before initial scan finished\n"); + fff_free_result(scan_result); + fff_destroy(file_picker); + return 1; + } + fff_free_result(scan_result); + + struct FffResult *res = fff_search(file_picker, "smkoe.c", "", 0, 0, 50, 0, 0); + if (!res->success) { + fprintf(stderr, "search failed: %s\n", res->error ? res->error : "?"); + fff_free_result(res); + fff_destroy(file_picker); + return 1; + } + + struct FffSearchResult *sr = (struct FffSearchResult *)res->handle; + uint32_t total = sr->count; + int found = 0; + for (uint32_t i = 0; i < sr->count; i++) { + const char *path = sr->items[i].relative_path; + if (path && strstr(path, "smoke.c")) { + found = 1; + fprintf(stderr, "found self: %s\n", path); + break; + } + } + + fff_free_search_result(sr); + fff_free_result(res); + fff_destroy(file_picker); + + if (!found) { + fprintf(stderr, "FAIL: smoke.c not in search results (count=%u)\n", total); + return 1; + } + + fprintf(stderr, "PASS\n"); + return 0; +} diff --git a/crates/fff-core/Cargo.toml b/crates/fff-core/Cargo.toml index 9e6bb0d7..703c24d1 100644 --- a/crates/fff-core/Cargo.toml +++ b/crates/fff-core/Cargo.toml @@ -22,6 +22,11 @@ harness = false name = "memmem_bench" harness = false +[[bench]] +name = "glob_bench" +harness = false +required-features = ["zlob"] + [features] default = [] # Enable C FFI exports diff --git a/crates/fff-core/benches/glob_bench.rs b/crates/fff-core/benches/glob_bench.rs new file mode 100644 index 00000000..914fa0c0 --- /dev/null +++ b/crates/fff-core/benches/glob_bench.rs @@ -0,0 +1,386 @@ +//! Compare three glob-matching strategies for `match_glob_pattern` in constraints.rs: +//! +//! 1. Current: `zlob_match_paths` -> collect `as_ptr()` into AHashSet, filter paths +//! by pointer to recover indices. +//! 2. Free fn: `zlob_match_paths_indices` (added in zlob 1.4) — indices direct from C. +//! 3. Compiled: `ZlobPattern::compile` + `match_indices` — same indices path, but with +//! a precompiled pattern (reusable). For one-shot it should match (2); the win +//! appears if the pattern is reused (chunked / repeated calls). +use ahash::AHashSet; +use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main}; +use zlob::{ZlobFlags, ZlobPattern, zlob_match_paths, zlob_match_paths_indices}; + +fn make_paths(n: usize) -> Vec { + let exts = ["rs", "ts", "lua", "md", "toml", "go", "py", "c", "h", "txt"]; + let dirs = [ + "src/core", + "src/ui", + "crates/fff-core/src", + "lua/fff", + "tests/integration", + "vendor/lib", + "node_modules/foo/bar", + "docs/internal", + ]; + let mut out = Vec::with_capacity(n); + for i in 0..n { + let dir = dirs[i % dirs.len()]; + let ext = exts[i % exts.len()]; + out.push(format!("{dir}/file_{i}.{ext}")); + } + out +} + +fn current_impl(pattern: &str, paths: &[&str]) -> AHashSet { + let Ok(Some(matches)) = zlob_match_paths(pattern, paths, ZlobFlags::RECOMMENDED) else { + return AHashSet::new(); + }; + let matched_set: AHashSet = matches.iter().map(|s| s.as_ptr() as usize).collect(); + paths + .iter() + .enumerate() + .filter(|(_, p)| matched_set.contains(&(p.as_ptr() as usize))) + .map(|(i, _)| i) + .collect() +} + +fn indices_free_fn(pattern: &str, paths: &[&str]) -> AHashSet { + let Ok(hits) = zlob_match_paths_indices(pattern, paths, ZlobFlags::RECOMMENDED) else { + return AHashSet::new(); + }; + hits.to_iter().collect() +} + +fn compiled_pattern(pattern: &str, paths: &[&str]) -> AHashSet { + let Ok(p) = ZlobPattern::compile(pattern, ZlobFlags::RECOMMENDED) else { + return AHashSet::new(); + }; + let Ok(hits) = p.match_indices(paths, ZlobFlags::RECOMMENDED) else { + return AHashSet::new(); + }; + hits.to_iter().collect() +} + +fn bench_glob_strategies(c: &mut Criterion) { + let path_counts = [1_000usize, 10_000, 100_000]; + let patterns: &[(&str, &str)] = &[ + ("ext_rs", "**/*.rs"), + ("dir_glob", "src/**/*.{ts,lua}"), + ("literal_seg", "**/node_modules/**"), + ("brace_multi", "**/*.{rs,ts,lua,md}"), + ]; + + for &count in &path_counts { + let owned = make_paths(count); + let paths: Vec<&str> = owned.iter().map(|s| s.as_str()).collect(); + + let mut group = c.benchmark_group(format!("glob_{count}")); + group.sample_size(50); + + for &(name, pat) in patterns { + let id_curr = BenchmarkId::new("current_ptr_trick", name); + group.bench_with_input(id_curr, &pat, |b, &pat| { + b.iter(|| { + let r = current_impl(black_box(pat), black_box(&paths)); + black_box(r); + }); + }); + + let id_idx = BenchmarkId::new("match_indices_fn", name); + group.bench_with_input(id_idx, &pat, |b, &pat| { + b.iter(|| { + let r = indices_free_fn(black_box(pat), black_box(&paths)); + black_box(r); + }); + }); + + let id_comp = BenchmarkId::new("compiled_pattern", name); + group.bench_with_input(id_comp, &pat, |b, &pat| { + b.iter(|| { + let r = compiled_pattern(black_box(pat), black_box(&paths)); + black_box(r); + }); + }); + } + + group.finish(); + } +} + +/// Hot-loop: pattern compiled ONCE, matched many times against fresh path slices. +/// Models a hypothetical change where we cache compiled patterns across calls. +fn bench_compiled_reuse(c: &mut Criterion) { + let owned = make_paths(10_000); + let paths: Vec<&str> = owned.iter().map(|s| s.as_str()).collect(); + let pat = "**/*.{rs,ts,lua,md}"; + + let mut group = c.benchmark_group("glob_reuse_10k"); + group.sample_size(100); + + group.bench_function("recompile_each_time", |b| { + b.iter(|| { + let p = ZlobPattern::compile(black_box(pat), ZlobFlags::RECOMMENDED).unwrap(); + let hits = p + .match_indices(black_box(&paths), ZlobFlags::RECOMMENDED) + .unwrap(); + black_box(hits.len()); + }); + }); + + let compiled = ZlobPattern::compile(pat, ZlobFlags::RECOMMENDED).unwrap(); + group.bench_function("reuse_compiled", |b| { + b.iter(|| { + let hits = compiled + .match_indices(black_box(&paths), ZlobFlags::RECOMMENDED) + .unwrap(); + black_box(hits.len()); + }); + }); + + group.finish(); +} + +/// End-to-end: build the lookup AND iterate items checking membership, modeling the +/// real call shape in `apply_constraints` (filter loop reads the result for every item). +fn bench_full_pipeline(c: &mut Criterion) { + bench_full_pipeline_size(c, 100_000); + bench_full_pipeline_size(c, 500_000); +} + +fn bench_full_pipeline_size(c: &mut Criterion, count: usize) { + let owned = make_paths(count); + let paths: Vec<&str> = owned.iter().map(|s| s.as_str()).collect(); + let pat = "**/*.{rs,ts,lua,md}"; + + let mut group = c.benchmark_group(format!("glob_full_pipeline_{count}")); + group.sample_size(50); + + // (A) current: indices -> AHashSet -> per-item set.contains + group.bench_function("indices_to_ahashset_then_filter", |b| { + b.iter(|| { + let hits = + zlob_match_paths_indices(black_box(pat), &paths, ZlobFlags::RECOMMENDED).unwrap(); + let set: AHashSet = hits.to_iter().collect(); + let count = (0..paths.len()).filter(|i| set.contains(i)).count(); + black_box(count); + }); + }); + + // (B) indices -> Vec bitmap -> per-item array lookup + group.bench_function("indices_to_bitmap_then_filter", |b| { + b.iter(|| { + let hits = + zlob_match_paths_indices(black_box(pat), &paths, ZlobFlags::RECOMMENDED).unwrap(); + let mut mask = vec![false; paths.len()]; + for i in hits.to_iter() { + mask[i] = true; + } + let count = (0..paths.len()).filter(|&i| mask[i]).count(); + black_box(count); + }); + }); + + // (C) compiled pattern + per-item matches() inside the filter loop. No batch. + group.bench_function("compiled_per_item_matches", |b| { + b.iter(|| { + let p = ZlobPattern::compile(black_box(pat), ZlobFlags::RECOMMENDED).unwrap(); + let count = paths.iter().filter(|path| p.matches_default(path)).count(); + black_box(count); + }); + }); + + // (D) compiled pattern + chunked batch -> Vec bitmap. Best of both: + // SIMD batch wins inside chunks, no global allocation pressure, O(1) lookup. + group.bench_function("compiled_chunked_to_bitmap", |b| { + b.iter(|| { + let p = ZlobPattern::compile(black_box(pat), ZlobFlags::RECOMMENDED).unwrap(); + let mut mask = vec![false; paths.len()]; + for (chunk_idx, chunk) in paths.chunks(512).enumerate() { + let base = chunk_idx * 512; + let hits = p.match_indices(chunk, ZlobFlags::RECOMMENDED).unwrap(); + for i in hits.to_iter() { + mask[base + i] = true; + } + } + let count = (0..paths.len()).filter(|&i| mask[i]).count(); + black_box(count); + }); + }); + + // (E') indices -> bit-packed Vec -> per-item bit test + group.bench_function("indices_to_bitset_then_filter", |b| { + b.iter(|| { + let hits = + zlob_match_paths_indices(black_box(pat), &paths, ZlobFlags::RECOMMENDED).unwrap(); + let words = paths.len().div_ceil(64); + let mut bits = vec![0u64; words]; + for i in hits.to_iter() { + bits[i >> 6] |= 1u64 << (i & 63); + } + let count = (0..paths.len()) + .filter(|&i| (bits[i >> 6] >> (i & 63)) & 1 == 1) + .count(); + black_box(count); + }); + }); + + // (E) (D) but larger chunk + group.bench_function("compiled_chunked_4096_to_bitmap", |b| { + b.iter(|| { + let p = ZlobPattern::compile(black_box(pat), ZlobFlags::RECOMMENDED).unwrap(); + let mut mask = vec![false; paths.len()]; + for (chunk_idx, chunk) in paths.chunks(4096).enumerate() { + let base = chunk_idx * 4096; + let hits = p.match_indices(chunk, ZlobFlags::RECOMMENDED).unwrap(); + for i in hits.to_iter() { + mask[base + i] = true; + } + } + let count = (0..paths.len()).filter(|&i| mask[i]).count(); + black_box(count); + }); + }); + + group.finish(); +} + +/// Mixed-constraint pipeline: glob + ext. Compare pre-pass batch (current) vs +/// inline `ZlobPattern::matches` after the cheap ext check rejects items. +/// +/// Variables: ext rejection rate. Extreme cases reveal where each strategy wins. +fn bench_mixed_pipeline(c: &mut Criterion) { + let count = 100_000; + let owned = make_paths(count); + let paths: Vec<&str> = owned.iter().map(|s| s.as_str()).collect(); + let glob_pat = "**/*.{rs,ts,lua,md}"; + + // 4 ext sets: from very selective (1/10 paths kept) to permissive (kept all). + let scenarios: &[(&str, &[&str])] = &[ + ("ext_1of10", &["rs"]), + ("ext_4of10", &["rs", "ts", "lua", "md"]), + ( + "ext_8of10", + &["rs", "ts", "lua", "md", "toml", "go", "py", "c"], + ), + ( + "ext_all", + &["rs", "ts", "lua", "md", "toml", "go", "py", "c", "h", "txt"], + ), + ]; + + fn ext_match(name: &str, exts: &[&str]) -> bool { + exts.iter().any(|e| { + let bytes = name.as_bytes(); + let elen = e.len(); + bytes.len() > elen + 1 + && bytes[bytes.len() - elen - 1] == b'.' + && bytes[bytes.len() - elen..].eq_ignore_ascii_case(e.as_bytes()) + }) + } + + let mut group = c.benchmark_group("glob_mixed_100k"); + group.sample_size(50); + + for &(name, exts) in scenarios { + // (A) PRE-PASS: build bitmap for ALL paths, then per-item ext-then-bitmap. + let id_pre = BenchmarkId::new("prepass_bitmap", name); + group.bench_with_input(id_pre, &exts, |b, &exts| { + b.iter(|| { + let hits = + zlob_match_paths_indices(black_box(glob_pat), &paths, ZlobFlags::RECOMMENDED) + .unwrap(); + let mut mask = vec![false; paths.len()]; + for i in hits.to_iter() { + mask[i] = true; + } + let count = paths + .iter() + .enumerate() + .filter(|&(_, p)| ext_match(p, exts)) + .filter(|&(i, _)| mask[i]) + .count(); + black_box(count); + }); + }); + + // (B) INLINE: compile once, per-item ext check first, then matches() only on survivors. + let id_inline = BenchmarkId::new("inline_compiled", name); + group.bench_with_input(id_inline, &exts, |b, &exts| { + b.iter(|| { + let p = ZlobPattern::compile(black_box(glob_pat), ZlobFlags::RECOMMENDED).unwrap(); + let count = paths + .iter() + .filter(|path| ext_match(path, exts)) + .filter(|path| p.matches_default(path)) + .count(); + black_box(count); + }); + }); + } + + group.finish(); +} + +/// Compare hand-rolled `file_has_extension` byte compare vs compiling extensions +/// into a single brace glob `**/*.{rs,ts,lua,md}` and dispatching through zlob. +/// Both share the same per-item "filter then count" shape. +fn bench_extensions_vs_glob(c: &mut Criterion) { + let owned = make_paths(100_000); + let paths: Vec<&str> = owned.iter().map(|s| s.as_str()).collect(); + let exts = ["rs", "ts", "lua", "md"]; + let glob_pat = "**/*.{rs,ts,lua,md}"; + + fn ext_match(name: &str, exts: &[&str]) -> bool { + let bytes = name.as_bytes(); + exts.iter().any(|e| { + let elen = e.len(); + bytes.len() > elen + 1 + && bytes[bytes.len() - elen - 1] == b'.' + && bytes[bytes.len() - elen..].eq_ignore_ascii_case(e.as_bytes()) + }) + } + + let mut group = c.benchmark_group("ext_vs_glob_100k"); + group.sample_size(50); + + group.bench_function("file_has_extension_loop", |b| { + b.iter(|| { + let count = paths.iter().filter(|p| ext_match(p, &exts)).count(); + black_box(count); + }); + }); + + group.bench_function("compiled_brace_glob_inline", |b| { + b.iter(|| { + let p = ZlobPattern::compile(black_box(glob_pat), ZlobFlags::RECOMMENDED).unwrap(); + let count = paths.iter().filter(|path| p.matches_default(path)).count(); + black_box(count); + }); + }); + + group.bench_function("brace_glob_prepass_bitmap", |b| { + b.iter(|| { + let hits = + zlob_match_paths_indices(black_box(glob_pat), &paths, ZlobFlags::RECOMMENDED) + .unwrap(); + let mut mask = vec![false; paths.len()]; + for i in hits.to_iter() { + mask[i] = true; + } + let count = (0..paths.len()).filter(|&i| mask[i]).count(); + black_box(count); + }); + }); + + group.finish(); +} + +criterion_group!( + benches, + bench_glob_strategies, + bench_compiled_reuse, + bench_full_pipeline, + bench_mixed_pipeline, + bench_extensions_vs_glob +); +criterion_main!(benches); diff --git a/crates/fff-core/src/background_watcher.rs b/crates/fff-core/src/background_watcher.rs index 9cabffaf..26c03eaa 100644 --- a/crates/fff-core/src/background_watcher.rs +++ b/crates/fff-core/src/background_watcher.rs @@ -41,6 +41,8 @@ impl BackgroundWatcher { shared_picker: SharedFilePicker, shared_frecency: SharedFrecency, mode: FFFMode, + enable_fs_root_scanning: bool, + enable_home_dir_scanning: bool, ) -> Result { info!( "Initializing background watcher for path: {}, mode: {:?}", @@ -48,34 +50,31 @@ impl BackgroundWatcher { mode, ); - // Refuse to watch the filesystem root or the user's home directory. - // These are prone to high-volume event churn (editor temp files, - // browser caches, log rotations) which inflates the overflow arena - // and, on macOS, can exhaust the per-process FSEvents stream limit. - if base_path.parent().is_none() - || Some(base_path.as_os_str()) == dirs::home_dir().as_ref().map(|p| p.as_os_str()) - { + // by default we do not want to allow users to search their FS root, this is very error prone + // though some consumers would specifically allow that e.g. unikernels, windows disc + // partition or sub file systems. By default - fail, unless user permits + let is_fs_root = base_path.parent().is_none(); + // use rust's path api for maximum reliability of the comparison + let is_home_dir = Some(&base_path) == dirs::home_dir().as_ref(); + + if (is_fs_root && !enable_fs_root_scanning) || (is_home_dir && !enable_home_dir_scanning) { return Err(Error::FilesystemRoot(base_path)); } // macOS: always use a single recursive FSEvent stream. - // // Per-dir NonRecursive watches create one FSEvent stream per dir. // The per-process FSEvent cap is lower than expected in practice // (4096 per process, but FFF usually is running within code editors), // and each failed `watch()` after the cap blocks ~40 ms on kernel retry. // Yes we pay for filtering events on handler phase but it is usable // - // macOS and Windows use a single recursive watch. FSEvents and - // ReadDirectoryChangesW both support true kernel-level recursion - // on one handle — per-dir NonRecursive watches burn streams/handles - // for no benefit and, on Windows, have been observed to silently - // drop Modify events for nested paths. + // Windows doesn't seem to have a hard cap, but in practice non recursive watching + // does a way worse job and often looses events which is not an option for us. // // Linux keeps the per-dir NonRecursive strategy: inotify has no - // kernel-level recursion, so Recursive here would still register - // one watch per subdir but without the ignored-dir filtering we - // get by iterating `picker.for_each_dir` ourselves. + // kernel-level watcher recursion, so we have to manually watch every single interested + // directory for watch events which is in practice stable and fast if system has enough + // spare watcher (configurable by the user, usually 100k - 1m) let use_recursive = cfg!(any(target_os = "macos", target_os = "windows")); let (watch_tx, watch_rx) = mpsc::channel::(); diff --git a/crates/fff-core/src/constraints.rs b/crates/fff-core/src/constraints.rs index dc60d8fb..d93ac70b 100644 --- a/crates/fff-core/src/constraints.rs +++ b/crates/fff-core/src/constraints.rs @@ -1,6 +1,5 @@ //! Constraint-based prefiltering for search queries. -use ahash::AHashSet; use fff_query_parser::{Constraint, GitStatusFilter}; use smallvec::SmallVec; @@ -152,312 +151,436 @@ pub fn path_contains_segment(path: &str, segment: &str) -> bool { false } +/// Returns `None` if no constraints are present, `Some(filtered)` otherwise. +/// +/// Constraint semantics: +/// - All `Extension` constraints OR together (file matches if ANY extension hits). +/// They're split out up front so the per-item loop reads the OR predicate as a +/// single short-circuit check, not as N AND-merged sub-constraints. +/// - Every other constraint kind ANDs (file matches only if ALL hold). They're +/// evaluated in order with short-circuit on first failure. +pub(crate) fn apply_constraints<'a, T: Constrainable + Sync>( + items: &'a [T], + constraints: &[Constraint<'_>], + arena: ArenaPtr, +) -> Option> { + if constraints.is_empty() { + return None; + } + let plan = ConstraintPlan::build(constraints, items, arena); + Some(plan.run(items, arena)) +} + +#[cfg(feature = "zlob")] +type GlobPattern = zlob::ZlobPattern; +#[cfg(not(feature = "zlob"))] +type GlobPattern = globset::GlobMatcher; + +/// How `Constraint::Glob` is evaluated for each item. +enum GlobStrategy { + /// No Glob constraint present. + None, + /// Pure-glob workload (no Extension filter to reject items first). + /// Batch all paths through zlob/globset once; per-item check is a Vec lookup. + Prepass(Vec>), + /// Mixed workload (Extension filter present). Compile patterns up front, then + /// only run them on items that survive the cheap Extension OR check. + /// `None` slot = compile failure -> never matches; preserves index alignment. + Inline(Vec>), +} + +/// Bundles preprocessed constraints for the per-item evaluator. +pub(crate) struct ConstraintPlan<'q, 'c> { + /// OR semantics — file passes if ANY extension matches. Empty = no ext filter. + extensions: SmallVec<[&'q str; 8]>, + /// AND semantics — file passes only if ALL match. + rest: SmallVec<[&'c Constraint<'q>; 8]>, + glob: GlobStrategy, +} + +pub(crate) struct ConstraintsBuffers { + fname: String, + path: String, +} + +impl ConstraintsBuffers { + pub(crate) fn new() -> Self { + Self { + fname: String::with_capacity(64), + path: String::with_capacity(64), + } + } +} + +impl<'q, 'c> ConstraintPlan<'q, 'c> { + pub(crate) fn build( + constraints: &'c [Constraint<'q>], + items: &[T], + arena: ArenaPtr, + ) -> Self { + let mut extensions = SmallVec::new(); + let mut rest = SmallVec::new(); + for c in constraints { + match c { + Constraint::Extension(ext) => extensions.push(*ext), + _ => rest.push(c), + } + } + let has_pre_filter = !extensions.is_empty() || rest.iter().any(|&c| !is_glob_node(c)); + let glob = build_glob_strategy(&rest, has_pre_filter, items, arena); + Self { + extensions, + rest, + glob, + } + } + + fn run<'a, T: Constrainable + Sync>(&self, items: &'a [T], arena: ArenaPtr) -> Vec<&'a T> { + if items.len() >= PAR_THRESHOLD { + use rayon::prelude::*; + items + .par_iter() + .enumerate() + .map_init(ConstraintsBuffers::new, |scratch, (i, item)| { + self.matches(item, i, arena, scratch).then_some(item) + }) + .flatten() + .collect() + } else { + let mut scratch = ConstraintsBuffers::new(); + items + .iter() + .enumerate() + .filter_map(|(i, item)| self.matches(item, i, arena, &mut scratch).then_some(item)) + .collect() + } + } + + #[inline] + pub(crate) fn matches( + &self, + item: &T, + index: usize, + arena: ArenaPtr, + scratch: &mut ConstraintsBuffers, + ) -> bool { + if !self.passes_extensions(item, arena, scratch) { + return false; + } + + let mut glob_idx = 0; + self.rest.iter().all(|c| { + let glob: &GlobStrategy = &self.glob; + let glob_idx: &mut usize = &mut glob_idx; + let negate = false; + let raw = match c { + Constraint::Glob(_) => { + let m = match glob { + GlobStrategy::None => true, + GlobStrategy::Prepass(masks) => masks + .get(*glob_idx) + .and_then(|mask| mask.get(index).copied()) + .unwrap_or(false), + GlobStrategy::Inline(patterns) => { + item.write_relative_path(arena, &mut scratch.path); + patterns + .get(*glob_idx) + .and_then(|p| p.as_ref()) + .map(|p| compiled_matches(p, &scratch.path)) + .unwrap_or(false) + } + }; + *glob_idx += 1; + m + } + // Reachable only via `Not(Extension(_))` — bare extensions are split out + // up front and handled in `passes_extensions`. + Constraint::Extension(ext) => { + item.write_file_name(arena, &mut scratch.fname); + file_has_extension(&scratch.fname, ext) + } + Constraint::PathSegment(segment) => { + item.write_relative_path(arena, &mut scratch.path); + path_contains_segment(&scratch.path, segment) + } + Constraint::FilePath(suffix) => { + item.write_relative_path(arena, &mut scratch.path); + path_ends_with_suffix(&scratch.path, suffix) + } + Constraint::Text(text) => { + // Only meaningful under negation (used as exclude filter). + item.write_relative_path(arena, &mut scratch.path); + contains_ascii_ci(&scratch.path, text) + } + Constraint::GitStatus(filter) => matches_git_status(item.git_status(), filter), + Constraint::Not(inner) => { + return evaluate(item, index, inner, glob, glob_idx, !negate, arena, scratch); + } + // Pass-throughs — handled at higher levels. + Constraint::Parts(_) | Constraint::Exclude(_) | Constraint::FileType(_) => true, + }; + if negate { !raw } else { raw } + }) + } + + #[inline] + fn passes_extensions( + &self, + item: &T, + arena: ArenaPtr, + scratch: &mut ConstraintsBuffers, + ) -> bool { + if self.extensions.is_empty() { + return true; + } + item.write_file_name(arena, &mut scratch.fname); + self.extensions + .iter() + .any(|ext| file_has_extension(&scratch.fname, ext)) + } +} + #[inline] #[allow(clippy::too_many_arguments)] -fn item_matches_constraint_at_index( +fn evaluate( item: &T, - item_index: usize, + index: usize, constraint: &Constraint<'_>, - glob_results: &[(bool, AHashSet)], + glob: &GlobStrategy, glob_idx: &mut usize, negate: bool, arena: ArenaPtr, - fname_buf: &mut String, - path_buf: &mut String, + scratch: &mut ConstraintsBuffers, ) -> bool { - let matches = match constraint { - Constraint::Extension(ext) => { - item.write_file_name(arena, fname_buf); - file_has_extension(fname_buf, ext) - } + let raw = match constraint { Constraint::Glob(_) => { - let result = glob_results - .get(*glob_idx) - .map(|(is_neg, set)| { - let matched = set.contains(&item_index); - - if *is_neg { !matched } else { matched } - }) - .unwrap_or(true); + let m = match glob { + GlobStrategy::None => true, + GlobStrategy::Prepass(masks) => masks + .get(*glob_idx) + .and_then(|mask| mask.get(index).copied()) + .unwrap_or(false), + GlobStrategy::Inline(patterns) => { + item.write_relative_path(arena, &mut scratch.path); + patterns + .get(*glob_idx) + .and_then(|p| p.as_ref()) + .map(|p| compiled_matches(p, &scratch.path)) + .unwrap_or(false) + } + }; *glob_idx += 1; - return if negate { !result } else { result }; + m + } + // Reachable only via `Not(Extension(_))` — bare extensions are split out + // up front and handled in `passes_extensions`. + Constraint::Extension(ext) => { + item.write_file_name(arena, &mut scratch.fname); + file_has_extension(&scratch.fname, ext) } Constraint::PathSegment(segment) => { - item.write_relative_path(arena, path_buf); - path_contains_segment(path_buf, segment) + item.write_relative_path(arena, &mut scratch.path); + path_contains_segment(&scratch.path, segment) } Constraint::FilePath(suffix) => { - item.write_relative_path(arena, path_buf); - path_ends_with_suffix(path_buf, suffix) - } - Constraint::GitStatus(status_filter) => match (item.git_status(), status_filter) { - (Some(status), GitStatusFilter::Modified) => is_modified_status(status), - (Some(status), GitStatusFilter::Untracked) => status.contains(git2::Status::WT_NEW), - (Some(status), GitStatusFilter::Staged) => status.intersects( - git2::Status::INDEX_NEW - | git2::Status::INDEX_MODIFIED - | git2::Status::INDEX_DELETED - | git2::Status::INDEX_RENAMED - | git2::Status::INDEX_TYPECHANGE, - ), - (Some(status), GitStatusFilter::Unmodified) => status.is_empty(), - (None, GitStatusFilter::Unmodified) => true, - (None, _) => false, - }, - Constraint::Not(inner) => { - return item_matches_constraint_at_index( - item, - item_index, - inner, - glob_results, - glob_idx, - !negate, - arena, - fname_buf, - path_buf, - ); + item.write_relative_path(arena, &mut scratch.path); + path_ends_with_suffix(&scratch.path, suffix) } - - // only works with negation Constraint::Text(text) => { - item.write_relative_path(arena, path_buf); - contains_ascii_ci(path_buf, text) + // Only meaningful under negation (used as exclude filter). + item.write_relative_path(arena, &mut scratch.path); + contains_ascii_ci(&scratch.path, text) } - - // Parts and Exclude are handled at a higher level + Constraint::GitStatus(filter) => matches_git_status(item.git_status(), filter), + Constraint::Not(inner) => { + return evaluate(item, index, inner, glob, glob_idx, !negate, arena, scratch); + } + // Pass-throughs — handled at higher levels. Constraint::Parts(_) | Constraint::Exclude(_) | Constraint::FileType(_) => true, }; + if negate { !raw } else { raw } +} - if negate { !matches } else { matches } +#[inline] +fn matches_git_status(status: Option, filter: &GitStatusFilter) -> bool { + match (status, filter) { + (Some(s), GitStatusFilter::Modified) => is_modified_status(s), + (Some(s), GitStatusFilter::Untracked) => s.contains(git2::Status::WT_NEW), + (Some(s), GitStatusFilter::Staged) => s.intersects( + git2::Status::INDEX_NEW + | git2::Status::INDEX_MODIFIED + | git2::Status::INDEX_DELETED + | git2::Status::INDEX_RENAMED + | git2::Status::INDEX_TYPECHANGE, + ), + (Some(s), GitStatusFilter::Unmodified) => s.is_empty(), + (None, GitStatusFilter::Unmodified) => true, + (None, _) => false, + } } -/// Returns `None` if no constraints are present, `Some(filtered)` otherwise. -/// Extension constraints use OR logic; all others use AND. -pub(crate) fn apply_constraints<'a, T: Constrainable + Sync>( - items: &'a [T], - constraints: &[Constraint<'_>], +#[inline] +#[cfg(feature = "zlob")] +fn compiled_matches(p: &GlobPattern, path: &str) -> bool { + p.matches_default(path) +} + +#[inline] +#[cfg(not(feature = "zlob"))] +fn compiled_matches(p: &GlobPattern, path: &str) -> bool { + p.is_match(path) +} + +/// Decide between batch prepass and inline compiled patterns. +/// +/// `has_pre_filter` = true when something cheaper than glob can reject items first +/// (extensions OR non-glob constraints in `rest`). In that case inline pays glob +/// cost only on survivors and beats prepass on every workload we benched. Pure-glob +/// (no pre-filter) takes prepass — single batched zlob call beats N inline matches. +fn build_glob_strategy( + rest: &[&Constraint<'_>], + has_pre_filter: bool, + items: &[T], arena: ArenaPtr, -) -> Option> { - if constraints.is_empty() { - return None; +) -> GlobStrategy { + if !contains_glob(rest) { + return GlobStrategy::None; } + if has_pre_filter { + return GlobStrategy::Inline(compile_globs(rest)); + } + let buf = PathBuffer::collect(items, arena); + let path_refs = buf.as_strs(); + GlobStrategy::Prepass(precompute_masks(rest, &path_refs)) +} - // Separate extension constraints from other constraints — they use OR logic - let mut extensions: SmallVec<[&str; 8]> = SmallVec::new(); - let mut other_constraints: SmallVec<[&Constraint<'_>; 8]> = SmallVec::new(); - - for constraint in constraints { - match constraint { - Constraint::Extension(ext) => extensions.push(ext), - _ => other_constraints.push(constraint), - } +/// `Glob` or `Not(Glob)` — the constraint kinds whose evaluation goes through +/// the GlobStrategy. Everything else can pre-reject items before glob runs. +fn is_glob_node(c: &Constraint<'_>) -> bool { + match c { + Constraint::Glob(_) => true, + Constraint::Not(inner) => is_glob_node(inner), + _ => false, } +} - // Only collect paths if we have glob constraints (expensive) - let has_globs = other_constraints - .iter() - .any(|c| matches!(c, Constraint::Glob(_) | Constraint::Not(_))); +fn contains_glob(rest: &[&Constraint<'_>]) -> bool { + rest.iter().any(|c| is_glob_node(c)) +} + +/// Contiguous byte buffer holding every item's `relative_path`. Single allocation +/// instead of N `String`s. On Windows the in-place pass folds `\\` -> `/` so the +/// glob library sees a canonical separator. +struct PathBuffer { + bytes: Vec, + offsets: Vec<(usize, usize)>, +} - let glob_results = if has_globs { - // Build a single contiguous buffer of all relative paths + offset table. - // One allocation for the buffer, one for offsets — NOT one String per file. - // On Windows we fold `\\` into `/` while copying so globset/zlob see a - // canonical separator. The rewrite is in place on bytes we just wrote. - let mut path_buf = Vec::::new(); - let mut offsets = Vec::<(usize, usize)>::with_capacity(items.len()); +impl PathBuffer { + fn collect(items: &[T], arena: ArenaPtr) -> Self { + let mut bytes = Vec::::new(); + let mut offsets = Vec::with_capacity(items.len()); let mut tmp = String::with_capacity(64); - for item in items.iter() { - let start = path_buf.len(); + for item in items { + let start = bytes.len(); item.write_relative_path(arena, &mut tmp); - path_buf.extend_from_slice(tmp.as_bytes()); + bytes.extend_from_slice(tmp.as_bytes()); #[cfg(windows)] - for b in &mut path_buf[start..] { + for b in &mut bytes[start..] { if *b == b'\\' { *b = b'/'; } } - offsets.push((start, path_buf.len() - start)); + offsets.push((start, bytes.len() - start)); } - let path_refs: Vec<&str> = offsets - .iter() - .map(|&(off, len)| unsafe { std::str::from_utf8_unchecked(&path_buf[off..off + len]) }) - .collect(); - precompute_glob_matches(&other_constraints, &path_refs) - } else { - Vec::new() - }; - - let filtered: Vec<&T> = if items.len() >= PAR_THRESHOLD { - use rayon::prelude::*; - items - .par_iter() - .enumerate() - .map_init( - || (String::with_capacity(64), String::with_capacity(64)), - |(fname_buf, path_buf), (i, item)| { - if !extensions.is_empty() { - item.write_file_name(arena, fname_buf); - if !extensions - .iter() - .any(|ext| file_has_extension(fname_buf, ext)) - { - return None; - } - } - - let mut glob_idx = 0; - if other_constraints.iter().all(|constraint| { - item_matches_constraint_at_index( - item, - i, - constraint, - &glob_results, - &mut glob_idx, - false, - arena, - fname_buf, - path_buf, - ) - }) { - Some(item) - } else { - None - } - }, - ) - .flatten() - .collect() - } else { - let mut fname_buf = String::with_capacity(64); - let mut path_buf = String::with_capacity(64); + Self { bytes, offsets } + } - items + fn as_strs(&self) -> Vec<&str> { + self.offsets .iter() - .enumerate() - .filter(|&(i, item)| { - if !extensions.is_empty() { - item.write_file_name(arena, &mut fname_buf); - if !extensions - .iter() - .any(|ext| file_has_extension(&fname_buf, ext)) - { - return false; - } - } - - let mut glob_idx = 0; - other_constraints.iter().all(|constraint| { - item_matches_constraint_at_index( - item, - i, - constraint, - &glob_results, - &mut glob_idx, - false, - arena, - &mut fname_buf, - &mut path_buf, - ) - }) + .map(|&(off, len)| unsafe { + std::str::from_utf8_unchecked(&self.bytes[off..off + len]) }) - .map(|(_, item)| item) .collect() - }; + } +} - Some(filtered) +fn precompute_masks(rest: &[&Constraint<'_>], paths: &[&str]) -> Vec> { + let mut out = Vec::new(); + for c in rest { + walk_globs(c, &mut |pattern| { + out.push(match_glob_pattern(pattern, paths)) + }); + } + out } -fn precompute_glob_matches<'a>( - constraints: &[&Constraint<'a>], - paths: &[&str], -) -> Vec<(bool, AHashSet)> { - let mut results = Vec::new(); - for constraint in constraints { - collect_glob_indices(constraint, paths, &mut results, false); +fn compile_globs(rest: &[&Constraint<'_>]) -> Vec> { + let mut out = Vec::new(); + for c in rest { + walk_globs(c, &mut |pattern| out.push(compile_one(pattern))); } - results + out } -fn collect_glob_indices<'a>( - constraint: &Constraint<'a>, - paths: &[&str], - results: &mut Vec<(bool, AHashSet)>, - _is_negated: bool, -) { - match constraint { - Constraint::Glob(pattern) => { - let indices = match_glob_pattern(pattern, paths); - // Negation is handled by the `negate` parameter in - // `item_matches_constraint_at_index`, NOT here. Storing - // `is_negated=true` caused a double-negation bug when the - // Glob arm also applied `negate`. - results.push((false, indices)); - } - Constraint::Not(inner) => { - collect_glob_indices(inner, paths, results, true); - } +/// Visit every Glob (including ones nested under Not) in constraint walk order. +/// Order matters: `glob_idx` in the per-item evaluator increments by one per Glob node. +fn walk_globs(c: &Constraint<'_>, f: &mut F) { + match c { + Constraint::Glob(p) => f(p), + Constraint::Not(inner) => walk_globs(inner, f), _ => {} } } -/// Match a glob pattern against a list of paths, returning the set of matching indices. -/// -/// When the `zlob` feature is enabled, delegates to `zlob::zlob_match_paths` (Zig-compiled -/// C library, fastest). Otherwise falls back to `globset::Glob` (pure Rust). #[cfg(feature = "zlob")] -fn match_glob_pattern(pattern: &str, paths: &[&str]) -> AHashSet { - let Ok(Some(matches)) = zlob::zlob_match_paths(pattern, paths, zlob::ZlobFlags::RECOMMENDED) - else { - return AHashSet::new(); - }; +fn compile_one(pattern: &str) -> Option { + zlob::ZlobPattern::compile(pattern, zlob::ZlobFlags::RECOMMENDED).ok() +} - let matched_set: AHashSet = matches.iter().map(|s| s.as_ptr() as usize).collect(); +#[cfg(not(feature = "zlob"))] +fn compile_one(pattern: &str) -> Option { + globset::Glob::new(pattern) + .ok() + .map(|g| g.compile_matcher()) +} - if paths.len() >= PAR_THRESHOLD { - use rayon::prelude::*; - paths - .par_iter() - .enumerate() - .filter(|(_, p)| matched_set.contains(&(p.as_ptr() as usize))) - .map(|(i, _)| i) - .collect::>() - .into_iter() - .collect() - } else { - paths - .iter() - .enumerate() - .filter(|(_, p)| matched_set.contains(&(p.as_ptr() as usize))) - .map(|(i, _)| i) - .collect() +/// Build a `paths.len()`-sized bitmap. Vec beats AHashSet ~2× in the per-item +/// filter loop — no hashing, plain array indexing, sequential prefetcher-friendly. +#[cfg(feature = "zlob")] +fn match_glob_pattern(pattern: &str, paths: &[&str]) -> Vec { + let mut mask = vec![false; paths.len()]; + let Ok(hits) = zlob::zlob_match_paths_indices(pattern, paths, zlob::ZlobFlags::RECOMMENDED) + else { + return mask; + }; + for i in hits.to_iter() { + if i < mask.len() { + mask[i] = true; + } } + mask } #[cfg(not(feature = "zlob"))] -fn match_glob_pattern(pattern: &str, paths: &[&str]) -> AHashSet { +fn match_glob_pattern(pattern: &str, paths: &[&str]) -> Vec { + let mut mask = vec![false; paths.len()]; let Ok(glob) = globset::Glob::new(pattern) else { - return AHashSet::new(); + return mask; }; let matcher = glob.compile_matcher(); - if paths.len() >= PAR_THRESHOLD { use rayon::prelude::*; - paths - .par_iter() - .enumerate() - .filter(|(_, p)| matcher.is_match(p)) - .map(|(i, _)| i) - .collect::>() - .into_iter() - .collect() + mask.par_iter_mut() + .zip(paths.par_iter()) + .for_each(|(slot, p)| *slot = matcher.is_match(p)); } else { - paths - .iter() - .enumerate() - .filter(|(_, p)| matcher.is_match(p)) - .map(|(i, _)| i) - .collect() + for (slot, p) in mask.iter_mut().zip(paths.iter()) { + *slot = matcher.is_match(p); + } } + mask } #[cfg(test)] @@ -777,4 +900,79 @@ mod tests { "h file should be included" ); } + + #[test] + fn test_inline_glob_path_matches_prepass() { + // Mixed (extensions + glob) takes the inline-compiled path. + // Pure glob takes the prepass bitmap path. Both must give identical results. + let arena_ptr = ArenaPtr(std::ptr::null()); + let items = vec![ + TestItem { + relative_path: "src/main.rs", + file_name: "main.rs", + }, + TestItem { + relative_path: "src/lib.ts", + file_name: "lib.ts", + }, + TestItem { + relative_path: "tests/foo.rs", + file_name: "foo.rs", + }, + TestItem { + relative_path: "docs/readme.md", + file_name: "readme.md", + }, + ]; + + let mixed = vec![Constraint::Extension("rs"), Constraint::Glob("src/**")]; + let mixed_paths: Vec<&str> = apply_constraints(&items, &mixed, arena_ptr) + .unwrap() + .iter() + .map(|i| i.relative_path) + .collect(); + assert_eq!(mixed_paths, vec!["src/main.rs"]); + + let pure_glob = vec![Constraint::Glob("src/**")]; + let glob_paths: Vec<&str> = apply_constraints(&items, &pure_glob, arena_ptr) + .unwrap() + .iter() + .map(|i| i.relative_path) + .collect(); + assert!(glob_paths.contains(&"src/main.rs")); + assert!(glob_paths.contains(&"src/lib.ts")); + assert_eq!(glob_paths.len(), 2); + } + + #[test] + fn test_inline_negated_glob_with_extension() { + // Mixed Not(Glob) on inline path — exercise the negate=true branch in + // glob_matches_inline through the Not->Glob recursion. + let arena_ptr = ArenaPtr(std::ptr::null()); + let items = vec![ + TestItem { + relative_path: "src/main.rs", + file_name: "main.rs", + }, + TestItem { + relative_path: "vendor/foo.rs", + file_name: "foo.rs", + }, + TestItem { + relative_path: "vendor/foo.ts", + file_name: "foo.ts", + }, + ]; + + let constraints = vec![ + Constraint::Extension("rs"), + Constraint::Not(Box::new(Constraint::Glob("vendor/**"))), + ]; + let paths: Vec<&str> = apply_constraints(&items, &constraints, arena_ptr) + .unwrap() + .iter() + .map(|i| i.relative_path) + .collect(); + assert_eq!(paths, vec!["src/main.rs"]); + } } diff --git a/crates/fff-core/src/file_picker.rs b/crates/fff-core/src/file_picker.rs index edbee5e0..745e34cc 100644 --- a/crates/fff-core/src/file_picker.rs +++ b/crates/fff-core/src/file_picker.rs @@ -442,6 +442,12 @@ pub struct FilePickerOptions { pub watch: bool, /// Follow symbolic links during file indexing. pub follow_symlinks: bool, + /// Allow indexing the filesystem root (`/`). Off by default — these dirs + /// generate enormous fs-event traffic and are rarely the intended target. + pub enable_fs_root_scanning: bool, + /// Allow indexing the user's home directory. Off by default for the same + /// reason as `enable_fs_root_scanning`. + pub enable_home_dir_scanning: bool, } impl Default for FilePickerOptions { @@ -454,6 +460,8 @@ impl Default for FilePickerOptions { cache_budget: None, watch: true, follow_symlinks: false, + enable_fs_root_scanning: false, + enable_home_dir_scanning: false, } } } @@ -471,6 +479,8 @@ pub struct FilePicker { enable_content_indexing: bool, watch: bool, follow_symlinks: bool, + enable_fs_root_scanning: bool, + enable_home_dir_scanning: bool, } impl std::fmt::Debug for FilePicker { @@ -528,6 +538,14 @@ impl FilePicker { self.follow_symlinks } + pub fn fs_root_scanning_enabled(&self) -> bool { + self.enable_fs_root_scanning + } + + pub fn home_dir_scanning_enabled(&self) -> bool { + self.enable_home_dir_scanning + } + pub fn mode(&self) -> FFFMode { self.mode } @@ -695,10 +713,16 @@ impl FilePicker { error!("Base path does not exist: {}", options.base_path); return Err(Error::InvalidPath(path)); } - if path.parent().is_none() { + if path.parent().is_none() && !options.enable_fs_root_scanning { error!("Refusing to index filesystem root: {}", path.display()); return Err(Error::FilesystemRoot(path)); } + if !options.enable_home_dir_scanning + && Some(path.as_os_str()) == dirs::home_dir().as_ref().map(|p| p.as_os_str()) + { + error!("Refusing to index home directory: {}", path.display()); + return Err(Error::FilesystemRoot(path)); + } // Windows-only: canonicalize with so the base path does NOT // have the `\\?\` UNC prefix that `std::fs::canonicalize` adds. @@ -722,6 +746,8 @@ impl FilePicker { enable_content_indexing: options.enable_content_indexing, watch: options.watch, follow_symlinks: options.follow_symlinks, + enable_fs_root_scanning: options.enable_fs_root_scanning, + enable_home_dir_scanning: options.enable_home_dir_scanning, }) } @@ -747,6 +773,8 @@ impl FilePicker { let watch = picker.watch; let mode = picker.mode; let follow_symlinks = picker.follow_symlinks; + let enable_fs_root_scanning = picker.enable_fs_root_scanning; + let enable_home_dir_scanning = picker.enable_home_dir_scanning; let signals = picker.scan_signals(); let scanned_files_counter = picker.scanned_files_counter(); @@ -774,6 +802,8 @@ impl FilePicker { auto_cache_budget: true, install_watcher: true, follow_symlinks, + enable_fs_root_scanning, + enable_home_dir_scanning, }, ) .spawn(); @@ -851,6 +881,8 @@ impl FilePicker { shared_picker.clone(), shared_frecency.clone(), self.mode, + self.enable_fs_root_scanning, + self.enable_home_dir_scanning, )?; self.background_watcher = Some(watcher); self.signals.watcher_ready.store(true, Ordering::Release); @@ -1148,6 +1180,32 @@ impl FilePicker { } } + /// Glob search: filter indexed files by a single glob pattern, rank by + /// frecency, and paginate. Bypasses the regular query parser entirely — + /// useful when callers already have a literal glob (`*.rs`, `**/*.test.ts`) + /// and want neither fuzzy matching nor multi-token constraint parsing. + /// + /// Pipeline: `apply_constraints(Glob) → score_filtered_by_frecency → sort_and_paginate`. + /// Same ranking semantics as `fuzzy_search` when the fuzzy query is empty. + pub fn glob<'p>( + &'p self, + pattern: &'p str, + options: FuzzySearchOptions<'p>, + ) -> SearchResult<'p> { + let query = FFFQuery { + raw_query: pattern, + constraints: vec![fff_query_parser::Constraint::Glob(pattern)], + fuzzy_query: fff_query_parser::FuzzyQuery::Empty, + location: None, + }; + + // `fuzzy_search` short-circuits to `score_filtered_by_frecency` when + // `fuzzy_query` is `Empty`, then runs the same `sort_and_paginate` + // path. Reusing it keeps the ranking guarantees identical without + // exposing the private scoring helpers. + self.fuzzy_search(&query, None, options) + } + /// Perform a live grep search across indexed files. /// /// If `options.abort_signal` is set it overrides the picker's internal diff --git a/crates/fff-core/src/grep.rs b/crates/fff-core/src/grep.rs index c011c04e..f4620b1d 100644 --- a/crates/fff-core/src/grep.rs +++ b/crates/fff-core/src/grep.rs @@ -1,14 +1,8 @@ -//! High-performance grep engine for live content search. -//! -//! Searches file contents using the `grep-searcher` crate with mmap-backed -//! file access. Files are searched in frecency order for optimal pagination -//! performance — the most relevant files are searched first, enabling early -//! termination once enough results are collected. - use crate::{ BigramFilter, BigramOverlay, bigram_query::{fuzzy_to_bigram_query, regex_to_bigram_query}, - constraints::apply_constraints, + case_insensitive_memmem, + constraints::{ConstraintPlan, ConstraintsBuffers}, extract_bigrams, sort_buffer::sort_with_buffer, types::{ContentCacheBudget, FileItem, FileSliceExt, MmapSlot}, @@ -1012,41 +1006,37 @@ pub(crate) fn multi_grep_search<'a>( None }; - let (mut files_to_search, mut filtered_file_count) = - prepare_files_to_search(files, constraints, options, arena); + let base_file_count = match bigram_overlay { + Some(bigram_overlay) => bigram_overlay.base_file_count(), + None => files.len(), + }; + + let (mut files_to_search, mut filtered_file_count) = prefilter_files( + files, + constraints, + bigram_candidates.as_deref(), + base_file_count, + options, + arena, + ); // If constraints yielded 0 files and we had FilePath constraints, // retry without them (the path token was likely part of the search text). if files_to_search.is_empty() - && let Some(stripped) = strip_file_path_constraints(constraints) + && let Some(stripped) = strip_file_path_constraint_if_present(constraints) { - let (retry_files, retry_count) = prepare_files_to_search(files, &stripped, options, arena); + let (retry_files, retry_count) = prefilter_files( + files, + &stripped, + bigram_candidates.as_deref(), + base_file_count, + options, + arena, + ); files_to_search = retry_files; filtered_file_count = retry_count; } - // Apply bigram prefilter to the file list. Bigram columns only cover - // the indexable region — `bigram_overlay.base_file_count()` is the - // authoritative boundary. Files past it (unindexable base, overflow) - // are always retained and searched directly. - if let Some(ref candidates) = bigram_candidates { - let base_ptr = files.as_ptr(); - let bigram_boundary = bigram_overlay - .map(|o| o.base_file_count()) - .unwrap_or(files.len()); - - files_to_search.retain(|f| { - if f.is_overflow() { - return true; - } - let file_idx = unsafe { (*f as *const FileItem).offset_from(base_ptr) as usize }; - if file_idx >= bigram_boundary { - return true; - } - BigramFilter::is_candidate(candidates, file_idx) - }); - } - if files_to_search.is_empty() { return GrepResult { total_files, @@ -1194,14 +1184,11 @@ fn char_indices_to_byte_offsets(line: &str, char_indices: &[usize]) -> SmallVec< result } -use crate::case_insensitive_memmem; - -/// Minimum chunk size for paginated search. Must be large enough for good -/// thread utilization across rayon's pool (~28 threads on modern hardware) -/// but small enough to allow early termination after few chunks. -const PAGINATED_CHUNK_SIZE: usize = 512; - -#[tracing::instrument(skip_all, level = Level::DEBUG, fields(prefiltered_count = files_to_search.len()))] +#[tracing::instrument( + skip_all, + level = Level::DEBUG, + fields(prefiltered_count = files_to_search.len()) +)] fn perform_grep<'a, F>( files_to_search: &[&'a FileItem], options: &GrepSearchOptions, @@ -1221,27 +1208,12 @@ where let page_limit = options.page_limit; let budget_exceeded = AtomicBool::new(false); - // For paginated searches, process files in chunks to enable early - // termination. Each chunk is searched in parallel with rayon; between - // chunks we check whether enough matches have been collected. - // - // For full searches (page_limit = MAX), one chunk = all files — same - // throughput as before, no overhead from the chunking loop. - // - // For common queries ("x", "if") with ~99% hit rate: the first 512-file - // chunk yields ~500 matches, far exceeding page_limit=50. We stop after - // one chunk (~1ms) instead of searching all 93K files (~175ms). - let chunk_size = if page_limit < usize::MAX { - PAGINATED_CHUNK_SIZE - } else { - files_to_search.len().max(1) - }; - let mut result_files: Vec<&'a FileItem> = Vec::new(); let mut all_matches: Vec = Vec::new(); let mut files_consumed: usize = 0; let mut page_filled = false; + let chunk_size = rayon::current_num_threads() * 4; for chunk in files_to_search.chunks(chunk_size) { let chunk_offset = files_consumed; @@ -1249,12 +1221,10 @@ where .par_iter() .enumerate() .map_init( - // Per-thread scratch: a reusable read buffer for small files - // and an mmap slot for cache-miss large files (≥ FRESH_MMAP_THRESHOLD). - || { - tracing::info!("LMAOTHREAD"); - (Vec::with_capacity(64 * 1024), MmapSlot::default()) - }, + // tested it out a few times, this is just fine for rayon worker in this specific + // case it doesn't reallocate this many times and it is actually faster than using + // scoped threads with a predefined local scratch buffers because of spawn cost + || (Vec::with_capacity(64 * 1024), MmapSlot::default()), |(buf, mmap_slot), (local_idx, file)| { if ctx.abort_signal.load(Ordering::Relaxed) { budget_exceeded.store(true, Ordering::Relaxed); @@ -1436,73 +1406,134 @@ fn collect_grep_results<'a>( } } -/// Filter files by constraints and size/binary checks, sort by frecency, -/// and apply file-based pagination. -/// -/// Returns `(paginated_files, filtered_file_count)`. The paginated slice -/// is empty if the offset is past the end of available files. -fn prepare_files_to_search<'a>( +/// Single pass prefilter that doesn't involve file reading +/// allocates only amount of memory required for storing references of the FileItems have to be +/// opened for grepping unaviodably, in the worst case allocates N * memory if no prefilter needed +fn prefilter_files<'a>( files: &'a [FileItem], constraints: &[fff_query_parser::Constraint<'_>], + bigram_candidates: Option<&[u64]>, + base_count: usize, options: &GrepSearchOptions, arena: crate::simd_path::ArenaPtr, ) -> (Vec<&'a FileItem>, usize) { - let prefiltered: Vec<&FileItem> = if constraints.is_empty() { - files - .iter() - .filter(|f| { - !f.is_deleted() && !f.is_binary() && f.size > 0 && f.size <= options.max_file_size - }) - .collect() + let max_file_size = options.max_file_size; + let plan = if constraints.is_empty() { + None } else { - match apply_constraints(files, constraints, arena) { - Some(constrained) => constrained - .into_iter() - .filter(|f| { - !f.is_deleted() - && !f.is_binary() - && f.size > 0 - && f.size <= options.max_file_size - }) - .collect(), - None => files - .iter() - .filter(|f| { - !f.is_deleted() - && !f.is_binary() - && f.size > 0 - && f.size <= options.max_file_size - }) - .collect(), + Some(ConstraintPlan::build(constraints, files, arena)) + }; + + let mut scratch = ConstraintsBuffers::new(); + + #[inline(always)] + fn basic_prefilter(file: &FileItem, max: u64) -> bool { + !file.is_deleted() && !file.is_binary() && file.size > 0 && file.size <= max + } + + // squeeze as much prefilters into a single loop as possible + let mut prefiltered: Vec<&FileItem> = match bigram_candidates { + Some(candidates) => { + let boundary = base_count.min(files.len()); + let (indexed, tail) = files.split_at(boundary); + + let cap = BigramFilter::count_candidates(candidates) + tail.len(); + let mut out: Vec<&FileItem> = Vec::with_capacity(cap); + + let full_words = boundary / 64; + let last_word_bits = boundary % 64; + + // we need this because we already had a regression of the wrong bit + // has been set for the very last word based on the overlay, it's pretty cheap + macro_rules! evaluate_bigram_match_word { + ($word:expr, $base:expr) => {{ + let mut bits: u64 = $word; + while bits != 0 { + let bit = bits.trailing_zeros() as usize; + let file_idx = $base + bit; + bits &= bits - 1; + + let f = unsafe { indexed.get_unchecked(file_idx) }; + if !basic_prefilter(f, max_file_size) { + continue; + } + if let Some(plan) = plan.as_ref() + && !plan.matches(f, file_idx, arena, &mut scratch) + { + continue; + } + out.push(f); + } + }}; + } + + // Full words: every set bit guaranteed `< boundary`. + for (word_idx, &word) in candidates.iter().take(full_words).enumerate() { + if word != 0 { + evaluate_bigram_match_word!(word, word_idx * 64); + } + } + + // Last partial word: mask bits past `boundary` once at word load. + if last_word_bits != 0 { + // this will get only (mod 64) bits from the last word guaratee that it's 0 padded + let last_mask: u64 = (1u64 << last_word_bits) - 1; + let word = candidates[full_words] & last_mask; + if word != 0 { + evaluate_bigram_match_word!(word, full_words * 64); + } + } + + // Sequential processing for non-bigrammable files: they are always in the end + for (offset, f) in tail.iter().enumerate() { + if !basic_prefilter(f, max_file_size) { + continue; + } + if let Some(ref p) = plan + && !p.matches(f, boundary + offset, arena, &mut scratch) + { + continue; + } + out.push(f); + } + + out + } + // this will be executed if there is no bigram, in the worst case it will allocate + // whole array of files but probability in the real repo of NO preflter working is so + // low that we just ignore that, usually there would be at least a few files excluded + None => { + let mut out: Vec<&FileItem> = Vec::new(); + for (idx, f) in files.iter().enumerate() { + if !basic_prefilter(f, max_file_size) { + continue; + } + if let Some(ref p) = plan + && !p.matches(f, idx, arena, &mut scratch) + { + continue; + } + out.push(f); + } + out } }; let total_count = prefiltered.len(); - let mut sorted_files = prefiltered; - // Only sort when there is meaningful frecency or modification data to rank by. - // On large repos (500k+ files) with no frecency data (fresh session, benchmark), - // skipping the O(n log n) sort saves ~200ms per query. - let needs_sort = sorted_files - .iter() - .any(|f| f.total_frecency_score() != 0 || f.modified != 0); - - if needs_sort { - sort_with_buffer(&mut sorted_files, |a, b| { - b.total_frecency_score() - .cmp(&a.total_frecency_score()) - .then(b.modified.cmp(&a.modified)) - }); - } + sort_with_buffer(&mut prefiltered, |a, b| { + b.total_frecency_score() + .cmp(&a.total_frecency_score()) + .then(b.modified.cmp(&a.modified)) + }); if options.file_offset > 0 && options.file_offset < total_count { - let paginated = sorted_files.split_off(options.file_offset); + let paginated = prefiltered.split_off(options.file_offset); (paginated, total_count) } else if options.file_offset >= total_count { (Vec::new(), total_count) } else { - // offset == 0: no split needed, return as-is - (sorted_files, total_count) + (prefiltered, total_count) } } @@ -1521,19 +1552,6 @@ fn prepare_files_to_search<'a>( /// the *reference* (scalar) smith-waterman, which is O(needle × line_len) /// per line. For a 10k-line file that's 10k sequential reference calls. /// -/// `neo_frizbee::match_list` solves this by batching lines into -/// fixed-width SIMD buckets (4, 8, 12 … 512 bytes) and scoring 16+ -/// haystacks per SIMD invocation. A single `match_list` call over the -/// entire file replaces 10k individual `match_indices` calls. We then -/// call `match_indices` *only* on the ~5-20 lines that pass `min_score` -/// to extract character highlight positions. -/// -/// Line splitting uses `memchr::memchr` (the same SIMD-accelerated byte -/// search that `grep-searcher` and `bstr::ByteSlice::find_byte` use -/// internally) to locate `\n` terminators. This gives us the same -/// performance as the searcher's `LineStep` iterator without pulling in -/// the full searcher machinery. -/// /// For each file: /// 1. mmap the file, split lines via memchr '\n' (tracking line numbers + byte offsets) /// 2. Batch all lines through `match_list` (SIMD smith-waterman) @@ -1671,6 +1689,7 @@ fn fuzzy_grep_search<'a>( } else { arena }; + let file_bytes = file.get_content_for_search(buf, mmap_slot, file_arena, base_path, budget)?; @@ -1924,31 +1943,10 @@ pub(crate) fn grep_search<'a>( let regex = match options.mode { GrepMode::PlainText => None, GrepMode::Fuzzy => { - let (mut files_to_search, mut filtered_file_count) = - prepare_files_to_search(files, constraints_from_query, options, arena); - - if files_to_search.is_empty() - && let Some(stripped) = strip_file_path_constraints(constraints_from_query) - { - let (retry_files, retry_count) = - prepare_files_to_search(files, &stripped, options, arena); - files_to_search = retry_files; - filtered_file_count = retry_count; - } - - if files_to_search.is_empty() { - return GrepResult { - total_files, - filtered_file_count, - next_file_offset: 0, - ..Default::default() - }; - } - // Bigram prefilter: pick 5 evenly-spaced probe bigrams, require // (5 - max_typos) of them to appear. Widely-spaced probes are // far more selective than sliding windows of adjacent bigrams. - if let Some(idx) = bigram_index + let bigram_candidates = if let Some(idx) = bigram_index && idx.is_ready() { let bq = fuzzy_to_bigram_query(&grep_text, 7); @@ -1967,33 +1965,52 @@ pub(crate) fn grep_search<'a>( } } } + Some(candidates) + } else { + None + } + } else { + None + }; - // Bigram columns only cover the indexable region at - // `files[..overlay.base_file_count()]` — which equals - // `indexable_count` today. Files past that boundary - // (unindexable base files, overflow) are not tracked by - // the bigram filter, so we always retain them and let - // the full text search decide. - let base_ptr = files.as_ptr(); - let bigram_boundary = bigram_overlay - .map(|o| o.base_file_count()) - .unwrap_or(files.len()); - - files_to_search.retain(|f| { - if f.is_overflow() { - return true; - } + let base_count = match bigram_overlay { + Some(bigram_overlay) => bigram_overlay.base_file_count(), + None => files.len(), + }; - let file_idx = - unsafe { (*f as *const FileItem).offset_from(base_ptr) as usize }; + let (mut files_to_search, mut filtered_file_count) = prefilter_files( + files, + constraints_from_query, + bigram_candidates.as_deref(), + base_count, + options, + arena, + ); - if file_idx >= bigram_boundary { - return true; - } + if files_to_search.is_empty() + && let Some(stripped) = + strip_file_path_constraint_if_present(constraints_from_query) + { + let (retry_files, retry_count) = prefilter_files( + files, + &stripped, + bigram_candidates.as_deref(), + base_count, + options, + arena, + ); - BigramFilter::is_candidate(&candidates, file_idx) - }); - } + files_to_search = retry_files; + filtered_file_count = retry_count; + } + + if files_to_search.is_empty() { + return GrepResult { + total_files, + filtered_file_count, + next_file_offset: 0, + ..Default::default() + }; } return fuzzy_grep_search( @@ -2086,108 +2103,36 @@ pub(crate) fn grep_search<'a>( None }; - // Overflow files (added after the bigram index was built) are not in - // the candidate bitset. They're few by definition, so just search all - // of them directly via memchr — no bigram tracking needed. - let overflow_start = bigram_overlay + // Bigram bitset only covers `files[..bigram_boundary]`. Overflow + unindexable + // tail files past the boundary are always retained — `prefilter_files` walks them + // via the linear sweep after the bitset walk. + let bigram_boundary = bigram_overlay .map(|o| o.base_file_count()) .unwrap_or(files.len()); - // it is important that this step is coming as early as possible - let (files_to_search, filtered_file_count) = match bigram_candidates { - Some(ref candidates) if constraints_from_query.is_empty() => { - // this call is essentially free and much more efficient than allowing a recollection - let overflow_count = files.len().saturating_sub(overflow_start); - let cap = BigramFilter::count_candidates(candidates) + overflow_count; - let mut result: Vec<&FileItem> = Vec::with_capacity(cap); - - for (word_idx, &word) in candidates.iter().enumerate() { - if word == 0 { - continue; - } - let base = word_idx * 64; - let mut bits = word; - while bits != 0 { - let bit = bits.trailing_zeros() as usize; - let file_idx = base + bit; - // Stop at the overflow boundary: the loop below walks - // every overflow file, so counting them here too would duplicate. - if file_idx < overflow_start { - let f = unsafe { files.get_unchecked(file_idx) }; - if !f.is_binary() && f.size <= options.max_file_size { - result.push(f); - } - } - bits &= bits - 1; - } - } - - // Append all overflow files — they're not in the bigram index - // so we search them unconditionally (typically few files). - for f in &files[overflow_start..] { - if !f.is_binary() && !f.is_deleted() && f.size <= options.max_file_size { - result.push(f); - } - } - - let total_searchable = files.len(); - let needs_sort = result - .iter() - .any(|f| f.total_frecency_score() != 0 || f.modified != 0); - - if needs_sort { - sort_with_buffer(&mut result, |a, b| { - b.total_frecency_score() - .cmp(&a.total_frecency_score()) - .then(b.modified.cmp(&a.modified)) - }); - } - - if options.file_offset > 0 && options.file_offset < result.len() { - let paginated = result.split_off(options.file_offset); - (paginated, total_searchable) - } else if options.file_offset >= result.len() { - (Vec::new(), total_searchable) - } else { - (result, total_searchable) - } - } - _ => { - let (mut fts, mut fc) = - prepare_files_to_search(files, constraints_from_query, options, arena); - - if fts.is_empty() - && let Some(stripped) = strip_file_path_constraints(constraints_from_query) - { - let (retry_files, retry_count) = - prepare_files_to_search(files, &stripped, options, arena); - fts = retry_files; - fc = retry_count; - } - - if let Some(ref candidates) = bigram_candidates { - let base_ptr = files.as_ptr(); - fts.retain(|f| { - if f.is_overflow() { - return true; - } - - let file_idx = - unsafe { (*f as *const FileItem).offset_from(base_ptr) as usize }; - - // Files past the bigram boundary (unindexable base files) - // are not tracked by the bigram filter — always search them. - if file_idx >= overflow_start { - return true; - } - - BigramFilter::is_candidate(candidates, file_idx) - }); - } + let (mut files_to_search, mut filtered_file_count) = prefilter_files( + files, + constraints_from_query, + bigram_candidates.as_deref(), + bigram_boundary, + options, + arena, + ); - (fts, fc) - } - }; + if files_to_search.is_empty() + && let Some(stripped) = strip_file_path_constraint_if_present(constraints_from_query) + { + let (retry_files, retry_count) = prefilter_files( + files, + &stripped, + bigram_candidates.as_deref(), + bigram_boundary, + options, + arena, + ); + files_to_search = retry_files; + filtered_file_count = retry_count; + } if files_to_search.is_empty() { return GrepResult { @@ -2273,7 +2218,7 @@ pub fn parse_grep_query(query: &str) -> FFFQuery<'_> { parser.parse(query) } -fn strip_file_path_constraints<'a>( +fn strip_file_path_constraint_if_present<'a>( constraints: &[Constraint<'a>], ) -> Option> { if !constraints diff --git a/crates/fff-core/src/scan.rs b/crates/fff-core/src/scan.rs index c861772c..d13bc020 100644 --- a/crates/fff-core/src/scan.rs +++ b/crates/fff-core/src/scan.rs @@ -40,6 +40,8 @@ pub(crate) struct ScanConfig { pub(crate) auto_cache_budget: bool, pub(crate) install_watcher: bool, pub(crate) follow_symlinks: bool, + pub(crate) enable_fs_root_scanning: bool, + pub(crate) enable_home_dir_scanning: bool, } /// A fully-configured scan job ready to run on a background thread. @@ -89,6 +91,8 @@ impl ScanJob { auto_cache_budget: !picker.has_explicit_cache_budget(), install_watcher: false, // the watcher is independent of rescan, it is not restarting EVER follow_symlinks: picker.follows_symlinks(), + enable_fs_root_scanning: picker.fs_root_scanning_enabled(), + enable_home_dir_scanning: picker.home_dir_scanning_enabled(), }; drop(guard); // just a sanity check @@ -243,6 +247,8 @@ impl ScanJob { shared_picker.clone(), shared_frecency.clone(), mode, + config.enable_fs_root_scanning, + config.enable_home_dir_scanning, ) { Ok(watcher) => { if let Ok(mut guard) = shared_picker.write() diff --git a/crates/fff-grep/src/lines.rs b/crates/fff-grep/src/lines.rs index 6b4fb1fc..0d7fabf5 100644 --- a/crates/fff-grep/src/lines.rs +++ b/crates/fff-grep/src/lines.rs @@ -68,8 +68,7 @@ pub fn count(bytes: &[u8], line_term: u8) -> u64 { memchr::memchr_iter(line_term, bytes).count() as u64 } -/// Given a line that possibly ends with a terminator, return that line without -/// the terminator. +/// Given a line that possibly ends with a terminator, return that line without the terminator. #[inline(always)] pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] { let line_term = line_term.as_bytes(); diff --git a/crates/fff-mcp/src/main.rs b/crates/fff-mcp/src/main.rs index 314cfa5d..40c0b461 100644 --- a/crates/fff-mcp/src/main.rs +++ b/crates/fff-mcp/src/main.rs @@ -270,6 +270,7 @@ async fn main() -> Result<(), Box> { .max_cached_files .map(fff::ContentCacheBudget::new_for_repo), follow_symlinks: false, + ..Default::default() }, ) .map_err(|e| format!("Failed to init file picker: {}", e))?; diff --git a/packages/fff-bun/examples/glob-bench.ts b/packages/fff-bun/examples/glob-bench.ts new file mode 100644 index 00000000..b8922e40 --- /dev/null +++ b/packages/fff-bun/examples/glob-bench.ts @@ -0,0 +1,159 @@ +#!/usr/bin/env bun +/** + * Glob benchmark: fff.glob vs Bun.Glob vs npm `glob`. + * + * Each engine is asked to enumerate files in a directory matching the same + * pattern. We measure wall-clock time + result count. fff scans + indexes + * once on init; the subsequent glob call is a filter over the in-memory + * index — that's what we time. + * + * Usage: + * bun examples/glob-bench.ts [dir] [pattern] [iterations] + * + * dir default: cwd + * pattern default: "**\/*.ts" + * iterations default: 5 (each engine runs N times, best+median reported) + * + * Install npm glob first: + * bun add glob + */ + +import { performance } from "node:perf_hooks"; +import { resolve } from "node:path"; +import { Glob as BunGlob } from "bun"; +import { FileFinder } from "../src/index"; + +// npm glob — optional. Skip silently if not installed. +let npmGlob: + | ((pattern: string, opts: { cwd: string }) => Promise) + | null = null; +try { + const mod: { + glob: (pattern: string, opts: { cwd: string }) => Promise; + } = + // @ts-ignore - optional peer; resolved at runtime, may be absent + await import("glob"); + npmGlob = mod.glob; +} catch { + console.warn("npm `glob` not installed — skipping. Run: bun add glob"); +} + +const dir = resolve(process.argv[2] ?? process.cwd()); +const pattern = process.argv[3] ?? "**/lua/**/*.lua"; +const iterations = Number(process.argv[4] ?? 5); + +console.log(`dir: ${dir}`); +console.log(`pattern: ${pattern}`); +console.log(`iterations: ${iterations}\n`); + +interface Sample { + ms: number; + count: number; +} + +function summarize(label: string, samples: Sample[]): void { + if (samples.length === 0) { + console.log(`${label.padEnd(16)} skipped`); + return; + } + const sorted = [...samples].sort((a, b) => a.ms - b.ms); + const best = sorted[0]!; + const median = sorted[Math.floor(sorted.length / 2)]!; + const worst = sorted[sorted.length - 1]!; + const counts = new Set(samples.map((s) => s.count)); + const countStr = + counts.size === 1 ? `${best.count}` : `[${[...counts].join(", ")}]`; + console.log( + `${label.padEnd(16)} best=${best.ms.toFixed(2)}ms median=${median.ms.toFixed(2)}ms worst=${worst.ms.toFixed(2)}ms count=${countStr}`, + ); +} + +async function bench( + fn: () => Promise | T, +): Promise<{ ms: number; result: T }> { + const start = performance.now(); + const result = await fn(); + return { ms: performance.now() - start, result }; +} + +// --------------------------------------------------------------------------- +// fff: init + warm scan, then time only the .glob() call. Init cost is +// reported separately because it's amortized across many subsequent calls. +// --------------------------------------------------------------------------- +const fffInit = await bench(() => { + const result = FileFinder.create({ + basePath: dir, + disableMmapCache: true, + disableContentIndexing: true, + disableWatch: true, + }); + if (!result.ok) throw new Error(result.error); + return result.value; +}); +const finder = fffInit.result; + +// Wait until initial scan done so the first .glob() doesn't see a partial +// index. Returns true = completed, false = timed out. +const scanReady = finder.waitForScan(30_000); +if (!scanReady.ok || !scanReady.value) { + console.error("fff: initial scan did not finish in 30s — exiting"); + process.exit(1); +} +console.log(`fff init+scan: ${fffInit.ms.toFixed(2)}ms\n`); + +const fffSamples: Sample[] = []; +for (let i = 0; i < iterations; i++) { + const r = await bench(() => { + const out = finder.glob(pattern, { pageSize: 100 }); + if (!out.ok) throw new Error(out.error); + return out.value; + }); + fffSamples.push({ ms: r.ms, count: r.result.items.length }); +} + +// --------------------------------------------------------------------------- +// Bun.Glob — sync iterator, returns relative paths. +// --------------------------------------------------------------------------- +const bunSamples: Sample[] = []; +for (let i = 0; i < iterations; i++) { + const r = await bench(() => { + const g = new BunGlob(pattern); + let count = 0; + for (const _ of g.scanSync({ cwd: dir })) count++; + return count; + }); + bunSamples.push({ ms: r.ms, count: r.result }); +} + +// --------------------------------------------------------------------------- +// npm glob — async, returns absolute or relative paths depending on opts. +// --------------------------------------------------------------------------- +const npmSamples: Sample[] = []; +if (npmGlob) { + for (let i = 0; i < iterations; i++) { + const r = await bench(() => npmGlob!(pattern, { cwd: dir })); + npmSamples.push({ ms: r.ms, count: r.result.length }); + } +} + +console.log("results:"); +summarize("fff.glob", fffSamples); +summarize("Bun.Glob", bunSamples); +summarize("npm glob", npmSamples); + +// Sanity: counts should be in the same ballpark. They won't match exactly +// because indexing rules differ (fff respects gitignore + skips binaries by +// default; Bun.Glob and npm glob do not). +const counts = { + fff: fffSamples[0]?.count ?? 0, + bun: bunSamples[0]?.count ?? 0, + npm: npmSamples[0]?.count ?? 0, +}; +console.log( + `\nNote: fff respects gitignore + skips binaries; Bun.Glob and npm glob walk the raw filesystem. Count differences are expected.`, +); +console.log( + `raw counts: fff=${counts.fff} bun=${counts.bun} npm=${counts.npm}`, +); + +finder.destroy(); diff --git a/packages/fff-bun/src/ffi.ts b/packages/fff-bun/src/ffi.ts index cf76ce5f..fe3cf0b5 100644 --- a/packages/fff-bun/src/ffi.ts +++ b/packages/fff-bun/src/ffi.ts @@ -62,6 +62,10 @@ const ffiDefinition = { ], returns: FFIType.ptr, }, + fff_create_instance_with: { + args: [FFIType.ptr], // *const FffCreateOptions + returns: FFIType.ptr, + }, fff_destroy: { args: [FFIType.ptr], returns: FFIType.void, @@ -82,6 +86,19 @@ const ffiDefinition = { returns: FFIType.ptr, }, + // Glob-only search (bypasses query parser) + fff_glob: { + args: [ + FFIType.ptr, // handle + FFIType.cstring, // pattern + FFIType.cstring, // current_file + FFIType.u32, // max_threads + FFIType.u32, // page_index + FFIType.u32, // page_size + ], + returns: FFIType.ptr, + }, + // Directory search fff_search_directories: { args: [ @@ -332,10 +349,25 @@ const RES_ERROR = 8; // *mut c_char (8) const RES_HANDLE = 16; // *mut c_void (8) const RES_INT_VALUE = 24; // i64 (8) -/** - * Read the FffResult envelope: check success, extract payload, free envelope. - * On error returns a Result. On success returns the raw handle pointer and int_value. - */ +// MUST match `crates/fff-c/src/ffi_types.rs::FffCreateOptions` +const FFF_CREATE_OPTIONS_VERSION = 1; +const FFF_CREATE_OPTIONS_SIZE = 88; +const FCO_VERSION = 0; +const FCO_BASE_PATH = 8; +const FCO_FRECENCY_DB_PATH = 16; +const FCO_HISTORY_DB_PATH = 24; +const FCO_ENABLE_MMAP_CACHE = 32; +const FCO_ENABLE_CONTENT_INDEXING = 33; +const FCO_WATCH = 34; +const FCO_AI_MODE = 35; +const FCO_LOG_FILE_PATH = 40; +const FCO_LOG_LEVEL = 48; +const FCO_CACHE_BUDGET_MAX_FILES = 56; +const FCO_CACHE_BUDGET_MAX_BYTES = 64; +const FCO_CACHE_BUDGET_MAX_FILE_SIZE = 72; +const FCO_ENABLE_FS_ROOT_SCANNING = 80; +const FCO_ENABLE_HOME_DIR_SCANNING = 81; + function readResultEnvelope( resultPtr: Pointer | null, ): { success: true; handlePtr: number; intValue: number } | Result { @@ -420,12 +452,23 @@ export type NativeHandle = Pointer; /** * Create a new file finder instance. + * + * Hand-encodes a [`FffCreateOptions`] struct (88 bytes, locked offsets — see + * `crates/fff-c/src/ffi_types.rs::options_layout_tests`) into a Buffer and + * passes its pointer to `fff_create_instance_with`. Inner cstring addresses + * come from Bun's native `ptr(buffer)` primitive — no round-trip helpers, + * no struct support gaps. + * + * Adding new options later means: (1) appending the field to + * `FffCreateOptions` in Rust, (2) bumping `FFF_CREATE_OPTIONS_VERSION`, + * (3) extending `FFF_CREATE_OPTIONS_SIZE` + offsets here. The C entry point + * never changes. */ export function ffiCreate( basePath: string, frecencyDbPath: string, historyDbPath: string, - useUnsafeNoLock: boolean, + _useUnsafeNoLock: boolean, enableMmapCache: boolean, enableContentIndexing: boolean, watch: boolean, @@ -435,48 +478,77 @@ export function ffiCreate( cacheBudgetMaxFiles: bigint, cacheBudgetMaxBytes: bigint, cacheBudgetMaxFileSize: bigint, + enableFsRootScanning: boolean, + enableHomeDirScanning: boolean, ): Result { const library = loadLibrary(); - const resultPtr = library.symbols.fff_create_instance2( - ptr(encodeString(basePath)), - ptr(encodeString(frecencyDbPath)), - ptr(encodeString(historyDbPath)), - useUnsafeNoLock, - enableMmapCache, - enableContentIndexing, - watch, - aiMode, - ptr(encodeString(logFilePath)), - ptr(encodeString(logLevel)), - cacheBudgetMaxFiles, - cacheBudgetMaxBytes, - cacheBudgetMaxFileSize, - ); + + // Keep cstring buffers alive across the FFI call. Bun's `ptr()` returns + // the underlying memory address of each Buffer — no round-trips. + const basePathCStr = encodeCStringBuf(basePath); + const frecencyCStr = encodeCStringBuf(frecencyDbPath); + const historyCStr = encodeCStringBuf(historyDbPath); + const logFileCStr = encodeCStringBuf(logFilePath); + const logLevelCStr = encodeCStringBuf(logLevel); + + const opts = Buffer.alloc(FFF_CREATE_OPTIONS_SIZE); + opts.writeUInt32LE(FFF_CREATE_OPTIONS_VERSION, FCO_VERSION); + writePtrLE(opts, FCO_BASE_PATH, basePathCStr); + writePtrLE(opts, FCO_FRECENCY_DB_PATH, frecencyCStr); + writePtrLE(opts, FCO_HISTORY_DB_PATH, historyCStr); + opts.writeUInt8(enableMmapCache ? 1 : 0, FCO_ENABLE_MMAP_CACHE); + opts.writeUInt8(enableContentIndexing ? 1 : 0, FCO_ENABLE_CONTENT_INDEXING); + opts.writeUInt8(watch ? 1 : 0, FCO_WATCH); + opts.writeUInt8(aiMode ? 1 : 0, FCO_AI_MODE); + writePtrLE(opts, FCO_LOG_FILE_PATH, logFileCStr); + writePtrLE(opts, FCO_LOG_LEVEL, logLevelCStr); + opts.writeBigUInt64LE(cacheBudgetMaxFiles, FCO_CACHE_BUDGET_MAX_FILES); + opts.writeBigUInt64LE(cacheBudgetMaxBytes, FCO_CACHE_BUDGET_MAX_BYTES); + opts.writeBigUInt64LE(cacheBudgetMaxFileSize, FCO_CACHE_BUDGET_MAX_FILE_SIZE); + opts.writeUInt8(enableFsRootScanning ? 1 : 0, FCO_ENABLE_FS_ROOT_SCANNING); + opts.writeUInt8(enableHomeDirScanning ? 1 : 0, FCO_ENABLE_HOME_DIR_SCANNING); + + const resultPtr = library.symbols.fff_create_instance_with(ptr(opts)); if (resultPtr === null) { return err("FFI returned null pointer"); } const success = read.u8(resultPtr, RES_SUCCESS) !== 0; - const errorPtr = read.ptr(resultPtr, RES_ERROR); - const handlePtr = read.ptr(resultPtr, RES_HANDLE); if (success) { + const handlePtr = read.ptr(resultPtr, RES_HANDLE); const handle = handlePtr as unknown as Pointer; library.symbols.fff_free_result(resultPtr); if (!handle || handle === (0 as unknown as Pointer)) { - return err("fff_create_instance returned null handle"); + return err("fff_create_instance_with returned null handle"); } return { ok: true, value: handle }; } else { + const errorPtr = read.ptr(resultPtr, RES_ERROR); const errorMsg = readCString(errorPtr) || "Unknown error"; library.symbols.fff_free_result(resultPtr); return err(errorMsg); } } +/** NUL-terminated UTF-8 buffer for `s`, or `null` for empty input. */ +function encodeCStringBuf(s: string | null | undefined): Buffer | null { + if (!s) return null; + return Buffer.from(s + "\0", "utf-8"); +} + +/** Write Bun's native pointer-to-buffer address into the options buffer. */ +function writePtrLE(buf: Buffer, offset: number, target: Buffer | null): void { + if (target == null) { + buf.writeBigUInt64LE(0n, offset); + return; + } + buf.writeBigUInt64LE(BigInt(ptr(target) as unknown as number), offset); +} + /** * Destroy and clean up an instance. */ @@ -1017,6 +1089,30 @@ export function ffiSearch( return parseSearchResult(resultPtr); } +/** + * Glob-only search. Bypasses the regular query parser, applies the pattern + * as a single `Constraint::Glob`, ranks by frecency, paginates. + */ +export function ffiGlob( + handle: NativeHandle, + pattern: string, + currentFile: string, + maxThreads: number, + pageIndex: number, + pageSize: number, +): Result { + const library = loadLibrary(); + const resultPtr = library.symbols.fff_glob( + handle, + ptr(encodeString(pattern)), + ptr(encodeString(currentFile)), + maxThreads, + pageIndex, + pageSize, + ); + return parseSearchResult(resultPtr); +} + /** * Perform fuzzy directory search. */ diff --git a/packages/fff-bun/src/finder.ts b/packages/fff-bun/src/finder.ts index b881ea92..22f11e5f 100644 --- a/packages/fff-bun/src/finder.ts +++ b/packages/fff-bun/src/finder.ts @@ -15,6 +15,7 @@ import { ffiGetBasePath, ffiGetHistoricalQuery, ffiGetScanProgress, + ffiGlob, ffiHealthCheck, ffiIsScanning, ffiLiveGrep, @@ -35,6 +36,7 @@ import { import type { DirSearchOptions, DirSearchResult, + GlobOptions, GrepOptions, GrepResult, HealthCheck, @@ -122,6 +124,8 @@ export class FileFinder { BigInt(options.cacheBudgetMaxFiles ?? 0), BigInt(options.cacheBudgetMaxBytes ?? 0), BigInt(options.cacheBudgetMaxFileSize ?? 0), + options.enableFsRootScanning ?? false, + options.enableHomeDirScanning ?? false, ); if (!result.ok) { @@ -202,6 +206,30 @@ export class FileFinder { ); } + /** + * Glob-only search. + * + * The pattern is applied as a single pass SIMD optimized prefiltering + * without any fuzzy matching involved. Faster and 100% compatible to npm `glob`. + * + * @param pattern - Glob pattern (required, non-empty) + * @param options - Glob search options (pagination, max threads, current file) + * @returns Search results with files matching the glob + */ + glob(pattern: string, options?: GlobOptions): Result { + const guard = this.ensureAlive(); + if (!guard.ok) return guard; + + return ffiGlob( + guard.value, + pattern, + options?.currentFile ?? "", + options?.maxThreads ?? 0, + options?.pageIndex ?? 0, + options?.pageSize ?? 0, + ); + } + /** * Search for directories matching the query. * diff --git a/packages/fff-bun/src/index.test.ts b/packages/fff-bun/src/index.test.ts index 87e048f9..e86c6b8d 100644 --- a/packages/fff-bun/src/index.test.ts +++ b/packages/fff-bun/src/index.test.ts @@ -164,6 +164,85 @@ describe("FileFinder - Full Lifecycle", () => { } }); + test("glob filters by extension via raw pattern", () => { + const result = finder.glob("**/*.ts", { pageSize: 50 }); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.value.items.length).toBeGreaterThan(0); + for (const item of result.value.items) { + expect(item.relativePath.endsWith(".ts")).toBe(true); + } + } + }); + + test("glob returns empty result for non-matching pattern", () => { + const result = finder.glob("**/this-extension-does-not-exist-anywhere.zzz"); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.value.items.length).toBe(0); + } + }); + + test("glob rejects empty pattern", () => { + const result = finder.glob(""); + expect(result.ok).toBe(false); + }); + + test("glob respects pageSize", () => { + const result = finder.glob("**/*.ts", { pageSize: 2 }); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.value.items.length).toBeLessThanOrEqual(2); + } + }); + + test("glob pageIndex offsets results", () => { + // pageIndex is a raw item offset (not a page-count multiplier). Verify + // by skipping the first item and checking the second result begins + // where page0[1] left off. + const page0 = finder.glob("**/*.ts", { pageSize: 5, pageIndex: 0 }); + const page1 = finder.glob("**/*.ts", { pageSize: 5, pageIndex: 1 }); + expect(page0.ok).toBe(true); + expect(page1.ok).toBe(true); + if (page0.ok && page1.ok && page0.value.items.length > 1 && page1.value.items.length > 0) { + expect(page1.value.items[0]!.relativePath).toBe(page0.value.items[1]!.relativePath); + } + }); + + test("glob directory-prefix pattern matches only that subtree", () => { + const result = finder.glob("src/**/*.ts", { pageSize: 100 }); + expect(result.ok).toBe(true); + if (result.ok) { + for (const item of result.value.items) { + expect(item.relativePath.startsWith("src/")).toBe(true); + expect(item.relativePath.endsWith(".ts")).toBe(true); + } + } + }); + + test("glob result items carry expected fields", () => { + const result = finder.glob("**/*.ts", { pageSize: 1 }); + expect(result.ok).toBe(true); + if (result.ok && result.value.items.length > 0) { + const item = result.value.items[0]; + expect(typeof item.relativePath).toBe("string"); + expect(typeof item.fileName).toBe("string"); + expect(item.relativePath.length).toBeGreaterThan(0); + } + }); + + test("glob literal extension pattern (no leading **) still filters", () => { + const result = finder.glob("*.ts", { pageSize: 100 }); + expect(result.ok).toBe(true); + // Don't assert non-zero — depends on whether top-level .ts files exist. + // Just assert all returned items match. + if (result.ok) { + for (const item of result.value.items) { + expect(item.relativePath.endsWith(".ts")).toBe(true); + } + } + }); + test("grep plain text returns matching lines", () => { const result = finder.grep("fff-core", { mode: "plain", diff --git a/packages/fff-bun/src/types.ts b/packages/fff-bun/src/types.ts index 05ad615a..0cc48967 100644 --- a/packages/fff-bun/src/types.ts +++ b/packages/fff-bun/src/types.ts @@ -27,7 +27,10 @@ export interface InitOptions { frecencyDbPath?: string; /** Path to query history database (optional, omit to skip query tracker initialization) */ historyDbPath?: string; - /** Use unsafe no-lock mode for databases (optional, defaults to false) */ + /** + * @deprecated No-op. The no-lock LMDB flags showed no measurable win under + * realistic contention and are now ignored. Kept for source-compat. + */ useUnsafeNoLock?: boolean; /** * Disable mmap cache warmup after the initial scan. When mmap cache is @@ -70,6 +73,18 @@ export interface InitOptions { cacheBudgetMaxBytes?: number; /** Override for the per-file byte cap in the content cache. */ cacheBudgetMaxFileSize?: number; + /** + * Allow indexing the filesystem root (`/`). Off by default — root is + * rarely the intended target and floods the watcher with churn-prone + * events. Setting this true is opt-in and the caller is responsible for + * the resulting fs-event volume. + */ + enableFsRootScanning?: boolean; + /** + * Allow indexing the user's home directory. Same trade-off as + * `enableFsRootScanning`. + */ + enableHomeDirScanning?: boolean; } /** @@ -90,6 +105,23 @@ export interface SearchOptions { pageSize?: number; } +/** + * Options for `glob`, the constraint-only search. + * + * The pattern is applied as a single pass SIMD optimized prefiltering + * without any fuzzy matching involved. Faster and 100% compatible to npm `glob`. + */ +export interface GlobOptions { + /** Maximum threads for parallel filtering (0 = auto). */ + maxThreads?: number; + /** Current file path (for deprioritization in results). */ + currentFile?: string; + /** Page index for pagination (default: 0). */ + pageIndex?: number; + /** Page size for pagination (default: 100). */ + pageSize?: number; +} + /** * A file item in search results */ diff --git a/packages/fff-node/src/ffi.ts b/packages/fff-node/src/ffi.ts index 23110c00..e1861106 100644 --- a/packages/fff-node/src/ffi.ts +++ b/packages/fff-node/src/ffi.ts @@ -62,6 +62,27 @@ import { createGrepCursor, err } from "./types.js"; const LIBRARY_KEY = "fff_c"; +const FFF_CREATE_OPTIONS_STRUCT = { + version: DataType.U32, + base_path: DataType.String, + frecency_db_path: DataType.String, + history_db_path: DataType.String, + enable_mmap_cache: DataType.U8, + enable_content_indexing: DataType.U8, + watch: DataType.U8, + ai_mode: DataType.U8, + log_file_path: DataType.String, + log_level: DataType.String, + cache_budget_max_files: DataType.U64, + cache_budget_max_bytes: DataType.U64, + cache_budget_max_file_size: DataType.U64, + enable_fs_root_scanning: DataType.U8, + enable_home_dir_scanning: DataType.U8, +}; + +// ALWAYS KEEP IN SYNC WITH fff.h +const FFF_CREATE_OPTIONS_VERSION = 1; + /** Grep mode constants matching the C API (u8). */ const GREP_MODE_PLAIN = 0; const GREP_MODE_REGEX = 1; @@ -216,7 +237,11 @@ function readResultEnvelope( paramsValue: unknown[], ): { rawPtr: JsExternal; struct: FffResultRaw } | Result { loadLibrary(); - const { rawPtr, struct: structData } = callRaw(funcName, paramsType, paramsValue); + const { rawPtr, struct: structData } = callRaw( + funcName, + paramsType, + paramsValue, + ); if (structData.success === 0) { const errorStr = readCString(structData.error); @@ -294,7 +319,8 @@ function callJsonResult( if (isNullPointer(handlePtr)) return { ok: true, value: undefined as T }; const jsonStr = readCString(handlePtr); freeString(handlePtr); - if (jsonStr === null || jsonStr === "") return { ok: true, value: undefined as T }; + if (jsonStr === null || jsonStr === "") + return { ok: true, value: undefined as T }; try { return { ok: true, value: snakeToCamel(JSON.parse(jsonStr)) as T }; } catch { @@ -322,14 +348,11 @@ function freeString(ptr: JsExternal): void { */ export type NativeHandle = JsExternal; -/** - * Create a new file finder instance. - */ export function ffiCreate( basePath: string, frecencyDbPath: string, historyDbPath: string, - useUnsafeNoLock: boolean, + _useUnsafeNoLock: boolean, enableMmapCache: boolean, enableContentIndexing: boolean, watch: boolean, @@ -339,42 +362,42 @@ export function ffiCreate( cacheBudgetMaxFiles: number, cacheBudgetMaxBytes: number, cacheBudgetMaxFileSize: number, + enableFsRootScanning: boolean, + enableHomeDirScanning: boolean, ): Result { loadLibrary(); - const { rawPtr, struct: structData } = callRaw( - "fff_create_instance2", - [ - DataType.String, // base_path - DataType.String, // frecency_db_path - DataType.String, // history_db_path - DataType.Boolean, // use_unsafe_no_lock - DataType.Boolean, // enable_mmap_cache - DataType.Boolean, // enable_content_indexing - DataType.Boolean, // watch - DataType.Boolean, // ai_mode - DataType.String, // log_file_path - DataType.String, // log_level - DataType.U64, // cache_budget_max_files - DataType.U64, // cache_budget_max_bytes - DataType.U64, // cache_budget_max_file_size - ], - [ - basePath, - frecencyDbPath, - historyDbPath, - useUnsafeNoLock, - enableMmapCache, - enableContentIndexing, - watch, - aiMode, - logFilePath, - logLevel, - cacheBudgetMaxFiles, - cacheBudgetMaxBytes, - cacheBudgetMaxFileSize, - ], - ); + const optsValue = { + version: FFF_CREATE_OPTIONS_VERSION, + base_path: basePath, + frecency_db_path: frecencyDbPath, + history_db_path: historyDbPath, + enable_mmap_cache: enableMmapCache ? 1 : 0, + enable_content_indexing: enableContentIndexing ? 1 : 0, + watch: watch ? 1 : 0, + ai_mode: aiMode ? 1 : 0, + log_file_path: logFilePath, + log_level: logLevel, + cache_budget_max_files: cacheBudgetMaxFiles, + cache_budget_max_bytes: cacheBudgetMaxBytes, + cache_budget_max_file_size: cacheBudgetMaxFileSize, + enable_fs_root_scanning: enableFsRootScanning ? 1 : 0, + enable_home_dir_scanning: enableHomeDirScanning ? 1 : 0, + }; + + const rawPtr = load({ + library: LIBRARY_KEY, + funcName: "fff_create_instance_with", + retType: DataType.External, + paramsType: [FFF_CREATE_OPTIONS_STRUCT], + paramsValue: [optsValue], + freeResultMemory: false, + }) as JsExternal; + + const [structData] = restorePointer({ + retType: [FFF_RESULT_STRUCT], + paramsValue: wrapPointer([rawPtr]), + }) as unknown as [FffResultRaw]; const success = structData.success !== 0; @@ -382,7 +405,7 @@ export function ffiCreate( if (success) { const handle = structData.handle; if (isNullPointer(handle)) { - return err("fff_create_instance2 returned null handle"); + return err("fff_create_instance_with returned null handle"); } return { ok: true, value: handle }; } else { @@ -816,10 +839,16 @@ function readGrepMatchFromRaw(raw: FffGrepMatchRaw): GrepMatch { match.fuzzyScore = raw.fuzzy_score; } if (raw.context_before_count > 0) { - match.contextBefore = readCStringArray(raw.context_before, raw.context_before_count); + match.contextBefore = readCStringArray( + raw.context_before, + raw.context_before_count, + ); } if (raw.context_after_count > 0) { - match.contextAfter = readCStringArray(raw.context_after, raw.context_after_count); + match.contextAfter = readCStringArray( + raw.context_after, + raw.context_after_count, + ); } if (raw.is_definition !== 0) { match.isDefinition = true; @@ -888,7 +917,8 @@ function parseGrepResult(rawPtr: JsExternal): Result { totalFilesSearched: gr.total_files_searched, totalFiles: gr.total_files, filteredFileCount: gr.filtered_file_count, - nextCursor: gr.next_file_offset > 0 ? createGrepCursor(gr.next_file_offset) : null, + nextCursor: + gr.next_file_offset > 0 ? createGrepCursor(gr.next_file_offset) : null, }; if (regexFallbackError) { grepResult.regexFallbackError = regexFallbackError; @@ -937,7 +967,11 @@ function parseSearchResult(rawPtr: JsExternal): Result { if (sr.location_tag === 1) { location = { type: "line", line: sr.location_line }; } else if (sr.location_tag === 2) { - location = { type: "position", line: sr.location_line, col: sr.location_col }; + location = { + type: "position", + line: sr.location_line, + col: sr.location_col, + }; } else if (sr.location_tag === 3) { location = { type: "range", @@ -1108,7 +1142,11 @@ function parseMixedSearchResult(rawPtr: JsExternal): Result { if (sr.location_tag === 1) { location = { type: "line", line: sr.location_line }; } else if (sr.location_tag === 2) { - location = { type: "position", line: sr.location_line, col: sr.location_col }; + location = { + type: "position", + line: sr.location_line, + col: sr.location_col, + }; } else if (sr.location_tag === 3) { location = { type: "range", @@ -1206,6 +1244,46 @@ export function ffiSearch( return parseSearchResult(rawPtr); } +/** + * Glob-only search. Bypasses the regular query parser, applies the pattern + * as a single `Constraint::Glob`, ranks by frecency, paginates. + */ +export function ffiGlob( + handle: NativeHandle, + pattern: string, + currentFile: string, + maxThreads: number, + pageIndex: number, + pageSize: number, +): Result { + loadLibrary(); + + const rawPtr = load({ + library: LIBRARY_KEY, + funcName: "fff_glob", + retType: DataType.External, + paramsType: [ + DataType.External, // handle + DataType.String, // pattern + DataType.String, // current_file + DataType.U32, // max_threads + DataType.U32, // page_index + DataType.U32, // page_size + ], + paramsValue: [ + handle, + pattern, + currentFile, + maxThreads, + pageIndex, + pageSize, + ], + freeResultMemory: false, + }) as JsExternal; + + return parseSearchResult(rawPtr); +} + /** * Perform fuzzy directory search. */ @@ -1231,7 +1309,14 @@ export function ffiSearchDirectories( DataType.U32, // page_index DataType.U32, // page_size ], - paramsValue: [handle, query, currentFile ?? "", maxThreads, pageIndex, pageSize], + paramsValue: [ + handle, + query, + currentFile ?? "", + maxThreads, + pageIndex, + pageSize, + ], freeResultMemory: false, }) as JsExternal; @@ -1443,7 +1528,11 @@ export function ffiGetScanProgress( handle: NativeHandle, ): Result<{ scannedFilesCount: number; isScanning: boolean }> { loadLibrary(); - const res = readResultEnvelope("fff_get_scan_progress", [DataType.External], [handle]); + const res = readResultEnvelope( + "fff_get_scan_progress", + [DataType.External], + [handle], + ); if ("ok" in res) return res; const handlePtr = res.struct.handle; @@ -1476,7 +1565,10 @@ export function ffiGetScanProgress( /** * Wait for a tree scan to complete. */ -export function ffiWaitForScan(handle: NativeHandle, timeoutMs: number): Result { +export function ffiWaitForScan( + handle: NativeHandle, + timeoutMs: number, +): Result { return callBoolResult( "fff_wait_for_scan", [DataType.External, DataType.U64], @@ -1487,7 +1579,10 @@ export function ffiWaitForScan(handle: NativeHandle, timeoutMs: number): Result< /** * Restart index in new path. */ -export function ffiRestartIndex(handle: NativeHandle, newPath: string): Result { +export function ffiRestartIndex( + handle: NativeHandle, + newPath: string, +): Result { return callVoidResult( "fff_restart_index", [DataType.External, DataType.String], diff --git a/packages/fff-node/src/finder.ts b/packages/fff-node/src/finder.ts index 272656b3..e8622bcc 100644 --- a/packages/fff-node/src/finder.ts +++ b/packages/fff-node/src/finder.ts @@ -15,6 +15,7 @@ import { ffiGetBasePath, ffiGetHistoricalQuery, ffiGetScanProgress, + ffiGlob, ffiHealthCheck, ffiIsScanning, ffiLiveGrep, @@ -33,6 +34,7 @@ import { import type { DirSearchOptions, DirSearchResult, + GlobOptions, GrepOptions, GrepResult, HealthCheck, @@ -121,6 +123,8 @@ export class FileFinder { options.cacheBudgetMaxFiles ?? 0, options.cacheBudgetMaxBytes ?? 0, options.cacheBudgetMaxFileSize ?? 0, + options.enableFsRootScanning ?? false, + options.enableHomeDirScanning ?? false, ); if (!result.ok) { @@ -201,6 +205,40 @@ export class FileFinder { ); } + /** + * Filters files using glob wildcard expression. + * + * The pattern is applied as a single pass SIMD optimized prefiltering + * without any fuzzy matching involved. Faster and 100% compatible to npm `glob`. + * + * @param pattern - Glob pattern (required, non-empty) + * @param options - Glob search options (pagination, max threads, current file) + * @returns Search results with files matching the glob + * + * @example + * ```typescript + * const result = finder.glob("**\/*.rs", { pageSize: 100 }); + * if (result.ok) { + * for (const item of result.value.items) { + * console.log(item.relativePath); + * } + * } + * ``` + */ + glob(pattern: string, options?: GlobOptions): Result { + const guard = this.ensureAlive(); + if (!guard.ok) return guard; + + return ffiGlob( + guard.value, + pattern, + options?.currentFile ?? "", + options?.maxThreads ?? 0, + options?.pageIndex ?? 0, + options?.pageSize ?? 0, + ); + } + /** * Search for directories matching the query. * diff --git a/packages/fff-node/src/types.ts b/packages/fff-node/src/types.ts index a409a0f1..c96f1c4e 100644 --- a/packages/fff-node/src/types.ts +++ b/packages/fff-node/src/types.ts @@ -27,7 +27,10 @@ export interface InitOptions { frecencyDbPath?: string; /** Path to query history database (optional, omit to skip query tracker initialization) */ historyDbPath?: string; - /** Use unsafe no-lock mode for databases (optional, defaults to false) */ + /** + * @deprecated No-op. The no-lock LMDB flags showed no measurable win under + * realistic contention and are now ignored. Kept for source-compat. + */ useUnsafeNoLock?: boolean; /** * Disable mmap cache warmup after the initial scan. When mmap cache is @@ -71,6 +74,18 @@ export interface InitOptions { cacheBudgetMaxBytes?: number; /** Override for the per-file byte cap in the content cache. */ cacheBudgetMaxFileSize?: number; + /** + * Allow indexing the filesystem root (`/`). Off by default — root is + * rarely the intended target and floods the watcher with churn-prone + * events. Setting this true is opt-in and the caller is responsible for + * the resulting fs-event volume. + */ + enableFsRootScanning?: boolean; + /** + * Allow indexing the user's home directory. Same trade-off as + * `enableFsRootScanning`. + */ + enableHomeDirScanning?: boolean; } /** @@ -91,6 +106,23 @@ export interface SearchOptions { pageSize?: number; } +/** + * Options for `glob`, the constraint-only search. + * + * The pattern is applied as a single pass SIMD optimized prefiltering + * without any fuzzy matching involved. Faster and 100% compatible to npm `glob`. + */ +export interface GlobOptions { + /** Maximum threads for parallel filtering (0 = auto). */ + maxThreads?: number; + /** Current file path (for deprioritization in results). */ + currentFile?: string; + /** Page index for pagination (default: 0). */ + pageIndex?: number; + /** Page size for pagination (default: 100). */ + pageSize?: number; +} + /** * A file item in search results */ diff --git a/packages/fff-node/test/e2e.mjs b/packages/fff-node/test/e2e.mjs index 309701ab..28c08091 100644 --- a/packages/fff-node/test/e2e.mjs +++ b/packages/fff-node/test/e2e.mjs @@ -130,6 +130,34 @@ describe("fff-node", { concurrency: 1 }, () => { }); }); + describe("glob", { concurrency: 1 }, () => { + it("filters by extension via raw glob pattern", () => { + const r = finder.glob("**/*.rs", { pageSize: 50 }); + assert.ok(r.ok, `glob failed: ${!r.ok ? r.error : ""}`); + assert.ok(r.value.items.length > 0, "expected at least one .rs file"); + for (const item of r.value.items) { + assert.ok(item.relativePath.endsWith(".rs"), `unexpected file: ${item.relativePath}`); + } + }); + + it("returns empty result for non-matching pattern", () => { + const r = finder.glob("**/this-extension-does-not-exist-anywhere.zzz"); + assert.ok(r.ok); + assert.equal(r.value.items.length, 0); + }); + + it("rejects empty pattern", () => { + const r = finder.glob(""); + assert.equal(r.ok, false); + }); + + it("respects pageSize", () => { + const r = finder.glob("**/*.rs", { pageSize: 3 }); + assert.ok(r.ok); + assert.ok(r.value.items.length <= 3); + }); + }); + describe("grep", { concurrency: 1 }, () => { it("finds FffResult in Rust sources", () => { // Constrain to .rs files so the assertion doesn't depend on result ordering @@ -201,7 +229,7 @@ describe("fff-node", { concurrency: 1 }, () => { it("decodes before/after context lines", () => { const r = finder.grep( - "match.contextBefore = readCStringArray(raw.context_before, raw.context_before_count);", + "LOLLOWOIEJIWOIUOIWUIWUIOUWE", // the random text visible here { mode: "plain", beforeContext: 1, @@ -212,18 +240,18 @@ describe("fff-node", { concurrency: 1 }, () => { assert.ok(r.ok, `grep with context failed: ${!r.ok ? r.error : ""}`); const match = r.value.items.find( - (m) => normalizePath(m.relativePath) === "packages/fff-node/src/ffi.ts", + (m) => normalizePath(m.relativePath) === "packages/fff-node/test/e2e.mjs", ); assert.ok( match, - `expected a match in packages/fff-node/src/ffi.ts, got: ${r.value.items + `expected a single match in the codebase, got: ${r.value.items .map((m) => normalizePath(m.relativePath)) .join(", ")}`, ); assert.deepEqual(match.contextBefore, [ - " if (raw.context_before_count > 0) {", + " const r = finder.grep(", ]); - assert.deepEqual(match.contextAfter, [" }"]); + assert.deepEqual(match.contextAfter, [" {"]); }); }); diff --git a/tests/programmatic_search_spec.lua b/tests/programmatic_search_spec.lua index b1a75ae5..ff2b58a6 100644 --- a/tests/programmatic_search_spec.lua +++ b/tests/programmatic_search_spec.lua @@ -204,6 +204,11 @@ describe('programmatic search APIs', function() fd:write('-- ' .. marker .. '\n') fd:close() + -- on windows metadata cache is updated lazily if we programmatically start searching + -- the new directory right after we update it we can sometimes see a race window especially on CI + -- this is fine in practice cause this is not a real use case for fff to search RIGHT AFTER mkdir + if vim.fn.has('win32') == 1 then vim.wait(250, function() return false end) end + -- Marker must not exist anywhere in the primary fff.nvim tree. local before = fff.content_search(marker) assert.are.equal(0, #before.items, 'marker leaked into primary fff.nvim tree')