diff --git a/Cargo.lock b/Cargo.lock index 54548a61..a68c345e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1152,6 +1152,7 @@ version = "0.7.0" dependencies = [ "numpy", "pyo3", + "pyo3-build-config", "turbovec", ] diff --git a/turbovec-python/Cargo.toml b/turbovec-python/Cargo.toml index 9cc4a980..8cfc72cd 100644 --- a/turbovec-python/Cargo.toml +++ b/turbovec-python/Cargo.toml @@ -12,3 +12,6 @@ crate-type = ["cdylib"] turbovec-core = { package = "turbovec", path = "../turbovec" } pyo3 = { version = "0.27.0", features = ["extension-module", "abi3-py39"] } numpy = "0.27.0" + +[build-dependencies] +pyo3-build-config = "0.27.0" diff --git a/turbovec-python/build.rs b/turbovec-python/build.rs new file mode 100644 index 00000000..88c0fea0 --- /dev/null +++ b/turbovec-python/build.rs @@ -0,0 +1,10 @@ +fn main() { + // Emit the platform-correct linker arguments for a Python extension + // module. On macOS this passes `-undefined dynamic_lookup` so symbols + // from the Python interpreter (e.g. `Py_True`) resolve at load time + // instead of failing the link step. Without it, a plain `cargo build` + // on macOS fails with "symbol(s) not found for architecture arm64" + // (issue #92). Building via maturin already injects these args; this + // makes a bare `cargo build` work too. + pyo3_build_config::add_extension_module_link_args(); +} diff --git a/turbovec/src/search.rs b/turbovec/src/search.rs index 4fda9433..078580ee 100644 --- a/turbovec/src/search.rs +++ b/turbovec/src/search.rs @@ -61,7 +61,6 @@ unsafe fn score_4bit_block_neon( for batch in 0..n_batches { let g_start = batch * FLUSH_EVERY; let g_end = (g_start + FLUSH_EVERY).min(n_byte_groups); - let n_groups = g_end - g_start; let mut accum = [vdupq_n_u16(0); 4]; @@ -1097,7 +1096,6 @@ unsafe fn score_4query_block_neon( for batch in 0..n_batches { let g_start = batch * FLUSH_EVERY; let g_end = (g_start + FLUSH_EVERY).min(n_byte_groups); - let n_groups = g_end - g_start; let mut acc: [[uint16x8_t; 4]; 4] = [[vdupq_n_u16(0); 4]; 4]; @@ -1343,6 +1341,10 @@ pub(crate) fn block_pair_has_allowed(mask: Option<&[u64]>, base_vec_pair: usize) /// VM / emulator that doesn't expose AVX2 to userspace). Without this /// fallback, pre-AVX2 x86_64 silently returned empty top-k results /// instead of falling back to a slower-but-correct kernel. +/// +/// Not compiled on aarch64, where the NEON kernel is always available and +/// this scalar path is never reached (it would warn as dead code). +#[cfg(not(target_arch = "aarch64"))] #[allow(clippy::too_many_arguments)] fn score_query_into_heap( qlut_uint8: &[u8],