diff --git a/.gitignore b/.gitignore
index c02a673aa93..ca2e6009681 100644
--- a/.gitignore
+++ b/.gitignore
@@ -72,6 +72,7 @@ cpp/include/tensorrt_llm/executor/version.h
 cpp/tensorrt_llm/kernels/contextFusedMultiHeadAttention/fmha_v2_cu/
 cpp/tensorrt_llm/kernels/contextFusedMultiHeadAttention/cubin/fmha_cubin.h
 .devcontainer/.env
+cpp/dependency_scan/scan_output/
 
 # User config files
 CMakeUserPresets.json
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index e013dbc17e2..86cfa00dba8 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -328,6 +328,13 @@ endif()
 
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBUILD_SYSTEM=cmake_oss ")
 
+# Generate dependency files (.d) to track all header dependencies This creates
+# .d files alongside .o files showing all headers used
+if(NOT WIN32)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -MD -MP")
+  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -MD -MP")
+endif()
+
 # note: cmake expr generation $<BOOL:${ENABLE_MULTI_DEVICE}> is a build time
 # evaluation so hard to debug at cmake time
 if(ENABLE_MULTI_DEVICE)
diff --git a/cpp/dependency_scan/README.md b/cpp/dependency_scan/README.md
new file mode 100644
index 00000000000..095236579d3
--- /dev/null
+++ b/cpp/dependency_scan/README.md
@@ -0,0 +1,345 @@
+# CPP Dependency Scanner
+
+Scans TensorRT-LLM build artifacts (headers, libraries, binaries) and maps them to source dependencies.
+A build artifact is any header file used in the build, and any linked static/dynamic library.
+
+## Quick Start
+
+```bash
+# Run scanner (scans ../build by default)
+python scan_build_artifacts.py
+
+# Output:
+#   scan_output/known.yml         - Mapped artifacts
+#   scan_output/unknown.yml       - Unmapped artifacts
+#   scan_output/path_issues.yml   - Non-existent paths
+```
+
+## Goals and Non-Goals
+
+### Goals
+
+This scanner is designed to:
+
+1. **Map Build Artifacts to Dependencies**
+   - Identify which source dependencies (container-origin, fetched, third-party) are used in the TensorRT-LLM C++ build
+   - Use tools + developer-provided pattern data to map build artifacts to canonical packages.
+
+2. **Achieve Complete Coverage**
+   - Goal: 100% of build artifacts mapped to known dependencies.
+   - Track unmapped artifacts in `unknown.yml` for iterative pattern refinement
+
+4. **Enable Iterative Development**
+   - Provide actionable output (`unknown.yml`) to guide pattern additions
+   - Support YAML-based pattern definitions for easy maintenance
+   - Validate patterns with schema checking
+
+### Non-Goals
+
+This scanner is **NOT** designed to:
+
+1. Identify any source-integrated dependencies. A source-integrated dependency is any copy-pasted code directly from a third-party repository to the TensorRT-LLM codebase.
+2. Identify pip-installed python runtime dependencies.
+3. Be a one-size-fits-all solution catching all dependencies.
+4. Enrich with license information for each dependency - or generate attributions.
+5. Track transitive dependencies that are invisible to cmake.
+6. provide Windows support.
+
+## Usage
+
+### Basic Usage
+
+```bash
+# Scan with default settings
+python scan_build_artifacts.py
+
+# Scan custom build directory
+python scan_build_artifacts.py --build-dir /path/to/build
+
+# Scan with custom output directory
+python scan_build_artifacts.py --output-dir /path/to/output
+
+# Validate YAML files
+python scan_build_artifacts.py --validate
+```
+
+### Command-Line Arguments
+
+- `--build-dir`: Build directory to scan (default: `../build/`)
+- `--output-dir`: Output directory for reports (default: `scan_output/`)
+- `--metadata-dir`: Metadata directory containing YAML files (default: `./metadata/`)
+- `--validate`: Validate YAML files without running scanner
+
+## Resolution Strategy
+
+1. **dpkg-query**: System packages via Debian package manager
+2. **YAML patterns**: Non-dpkg packages (TensorRT, PyTorch, 3rdparty/ submodules, etc.)
+
+## Output Format
+
+### known.yml
+
+```yaml
+summary:
+  total_artifacts: 6200
+  mapped: 6200
+  unmapped: 0
+  coverage: 100.0%
+  unique_dependencies: 48
+
+dependencies:
+  cuda-cudart:
+    - /usr/local/cuda-12.9/include/cuda_runtime.h
+    - /usr/local/cuda-12.9/include/cuda.h
+
+  libc6:
+    - /usr/include/stdio.h
+    - -lpthread
+    - -ldl
+
+  pytorch:
+    - /usr/local/lib/python3.12/dist-packages/torch/include/torch/torch.h
+    - -ltorch
+```
+
+### unknown.yml
+
+```yaml
+summary:
+  count: 0
+  action_required: Add patterns to YAML files in metadata/ for these artifacts
+artifacts: []
+```
+
+### path_issues.yml
+
+Reports artifacts whose resolved paths don't exist in the filesystem. This helps identify:
+- Stale build artifacts from old builds
+- Incorrectly resolved paths
+- Optional headers that may not be present
+- Temporary build files that were deleted
+
+**Note:** Library artifacts are excluded from this report since they don't have meaningful path resolution metadata.
+
+```yaml
+summary:
+  count: 1042
+  total_artifacts: 12238
+  percentage: 8.5%
+  note: These header paths were resolved from .d files but do not exist in the filesystem (libraries excluded)
+
+non_existent_paths:
+- resolved_path: /usr/local/lib/python3.12/dist-packages/torch/include/ATen/ops/_cudnn_attention_backward.h
+  type: header
+  source: /home/.../trtGptModelInflightBatching.cpp.o.d
+  d_file_path: /usr/local/lib/python3.12/dist-packages/torch/include/ATen/ops/_cudnn_attention_backward.h
+```
+
+**Field Descriptions:**
+- `resolved_path`: The final canonicalized absolute path after resolution
+- `type`: Artifact type (typically "header")
+- `source`: The .d file where this path was found
+- `d_file_path`: The original path as it appeared in the .d file (may be relative or absolute)
+
+**Common Causes:**
+- **Optional headers**: PyTorch/CUDA headers that don't exist in all installations (e.g., `_cudnn_attention_*`)
+- **Old CUDA paths**: References to previous CUDA versions no longer installed (e.g., `cuda-13.0` when only `cuda-12.9` exists)
+- **Build artifacts**: Temporary generated files deleted after build completion
+- **Stale .d files**: Dependency files from previous builds with different directory structures
+
+**Action:** Review the list and determine if these are expected (optional/temporary) or indicate path resolution issues.
+
+## Iterative Workflow
+
+1. **Run scanner** on build directory
+2. **Review outputs**:
+   - `scan_output/unknown.yml` - unmapped artifacts requiring pattern additions
+   - `scan_output/path_issues.yml` - non-existent paths (may indicate stale builds or optional dependencies)
+3. **Add patterns** to `metadata/*.yml` files for unknown artifacts
+4. **Re-run** to verify improved coverage
+5. **Repeat** until all artifacts mapped
+
+## Pattern Matching
+
+### Strategy Priority (High → Low)
+
+1. **Exact match**: `libcudart.so.12` → `cuda-cudart`
+2. **Path alias**: `/build/pytorch/include/` → `pytorch`
+3. **Generic inference**: `libfoobar.so` → `foobar`
+
+### Adding Patterns
+
+Edit existing or create new YAML file in `metadata/`:
+
+```yaml
+name: cutlass
+description: CUDA Templates for Linear Algebra Subroutines
+
+basename_matches:
+  - libcutlass.a
+
+linker_flags_matches:
+  - -lcutlass
+
+directory_matches:
+  - cutlass              # Single: matches any /cutlass/ in path
+  - 3rdparty/cutlass     # Multi: matches /3rdparty/cutlass/ sequence
+```
+
+#### Multi-Directory Patterns
+
+Directory patterns support both single and multi-directory matching:
+
+**Single Component:**
+- `"pytorch"` matches any path containing `/pytorch/`
+- Example: `/home/build/pytorch/include/torch.h` ✓
+
+**Multi-Directory:**
+- `"3rdparty/cutlass"` matches consecutive `/3rdparty/cutlass/` sequence
+- `"foo/bar"` matches `/home/foo/bar/file.h` ✓
+- `"foo/bar"` does NOT match `/home/foobar/file.h` ✗ (no substring matching)
+
+**Matching Rules:**
+- Exact component matching only (no substrings)
+- `"oo/ba"` will NOT match `/foo/bar/`
+- Rightmost match wins if pattern appears multiple times
+- Leading/trailing slashes are ignored (`"/foo/bar/"` = `"foo/bar"`)
+
+See `metadata/_template.yml` and `metadata/README.md` for details.
+
+## YAML Dependencies
+
+Each dependency file contains:
+
+```yaml
+name: pytorch
+description: PyTorch machine learning framework
+license: BSD-3-Clause
+copyright: Copyright (c) PyTorch Contributors
+homepage: https://pytorch.org/
+source: container
+
+basename_matches:
+  - libtorch.so
+  - libc10.so
+
+linker_flags_matches:
+  - -ltorch_python
+
+directory_matches:
+  - ATen
+  - c10
+  - caffe2
+  - torch
+
+aliases:
+  - torch
+```
+
+Multiple dependencies can be grouped in list format (see `metadata/base.yml`, `metadata/cuda.yml`).
+
+## Testing
+
+```bash
+cd tests
+python -m pytest test_scan_build_artifacts.py -v
+```
+
+## Troubleshooting
+
+**Low dpkg coverage**
+- Running on non-Debian system
+- YAML dependencies will handle more as fallback, with concrete patterns.
+
+**Many unknown artifacts**
+1. Review `scan_output/unknown.yml`
+2. Add patterns to `metadata/*.yml`
+3. Run `--validate` to check syntax
+4. Re-scan to verify
+
+**Wrong mappings**
+- Check pattern priorities in YAML files
+- More specific patterns should be listed first
+- Make sure the patterns are very specific, to avoid false positives, or interfering with other patterns.
+
+**High percentage in path_issues.yml**
+- If >20%, likely indicates stale build artifacts - run a clean rebuild
+- If <10%, likely optional/temporary headers - expected behavior
+- Check for references to uninstalled CUDA versions
+
+**Slow performance**
+- Use `--build-dir` to target specific subdirectories
+- Reduce build artifacts scope
+
+## Architecture
+
+```
+scan_build_artifacts.py (1,300 lines)
+├── DpkgResolver - dpkg-query for system packages
+├── ArtifactCollector - Parse D files, link files, wheels
+├── PatternMatcher - 3-tier YAML pattern matching
+└── OutputGenerator - Generate YAML reports
+```
+
+**Artifact Sources:**
+- D files: CMake dependency files with headers. Dependency source header files.
+- link.txt: Linker commands with libraries. Precompiled dependency artifacts.
+- Wheels: Python binaries via readelf. Runtime dependency artifacts.
+
+**Special Parsing Behaviors:**
+
+1. **Malformed .d File Handling** (_parse_d_file method)
+   - Some CMake-generated .d files contain paths with trailing colons
+   - Example: `/usr/include/stdc-predef.h:` (should be `/usr/include/stdc-predef.h`)
+   - Parser strips trailing colons to handle these malformed entries
+   - Prevents duplicate artifacts and improves accuracy
+
+2. **CMakeFiles Linker Artifact Extraction** (_parse_link_file method)
+   - CMake generates special linker artifacts in CMakeFiles directories
+   - Pattern: `/path/CMakeFiles/foo.dir/-Wl,-soname,libtest.so.1`
+   - Parser extracts library name and converts to linker flag: `-ltest`
+   - Enables proper dependency mapping for internal build artifacts
+
+3. **CMake .d File Path Resolution** (_parse_d_file method, lines 356-364)
+   - **Critical Fix (October 2025)**: Changed context directory for path resolution
+   - CMake generates .d files with paths relative to the **target's build directory** (where the Makefile for that target is located), **NOT** the top-level build directory
+   - **Context Directory**: Parent directory of `CMakeFiles/` (e.g., `/build/tensorrt_llm/batch_manager/`)
+   - **Example**: For .d file at `/build/tensorrt_llm/batch_manager/CMakeFiles/target.dir/file.cpp.o.d`:
+     - **Context is**: `/build/tensorrt_llm/batch_manager/` (parent of CMakeFiles)
+     - **NOT**: `/build/` (top-level build directory)
+     - Relative path `../../../tensorrt_llm/...` resolves correctly from this context
+   - **Before Fix**: Used `d_file.parent` (adjacent to CMakeFiles directory) - caused 49.9% path resolution errors
+   - **After Fix**: Uses parent of CMakeFiles directory - reduced errors to 7.2%
+   - **Path Existence Tracking**: Scanner marks each artifact with `path_exists` metadata and reports non-existent paths in `path_issues.yml`
+
+   **Algorithm:**
+   ```python
+   d_file_parts = d_file.parts
+   if 'CMakeFiles' in d_file_parts:
+       cmake_idx = d_file_parts.index('CMakeFiles')
+       context_dir = Path(*d_file_parts[:cmake_idx])  # Parent of CMakeFiles
+   else:
+       context_dir = self.build_dir  # Fallback
+   ```
+
+4. **3rdparty Submodule Resolution** (_parse_d_file method)
+   - When D files contain relative paths with submodule directories that don't exist relative to the build directory, the scanner attempts to resolve them from the configured submodules directory
+   - **Configuration**: Set via `THIRDPARTY_ROOT` constant in scan_build_artifacts.py
+   - **Default**: `TRTLLM_ROOT/3rdparty` (3 levels up from scanner location)
+   - **Customization**: Edit the `THIRDPARTY_ROOT` constant if dependencies move (e.g., to `${CMAKE_BINARY_DIR}/_deps/`)
+   - **Example**: `../../../../3rdparty/xgrammar/include/file.h` resolves to `{THIRDPARTY_ROOT}/xgrammar/include/file.h`
+
+**Resolution Flow:**
+1. Collect artifacts from build directory
+2. Try dpkg-query resolution (PRIMARY)
+3. Fall back to YAML patterns (FALLBACK)
+4. Generate known.yml, unknown.yml, and path_issues.yml reports
+
+## Files
+
+- `scan_build_artifacts.py` - Main scanner script
+- `metadata/*.yml` - Dependency patterns
+- `metadata/_template.yml` - Template for new dependencies
+- `metadata/_schema.yml` - YAML validation schema
+- `metadata/README.md` - Pattern documentation
+- `tests/test_scan_build_artifacts.py` - Unit tests
diff --git a/cpp/dependency_scan/metadata/README.md b/cpp/dependency_scan/metadata/README.md
new file mode 100644
index 00000000000..f55990958e3
--- /dev/null
+++ b/cpp/dependency_scan/metadata/README.md
@@ -0,0 +1,761 @@
+# Dependency Patterns
+
+This directory contains pattern definitions for dependency detection in the TensorRT-LLM C++ dependency scanner.
+
+## Quick Start
+
+After running the scanner, check `scan_output/unknown.yml` for unmapped artifacts, then add patterns here.
+
+## Structure
+
+Each `.yml` file represents one or more dependencies:
+
+```
+metadata/
+├── _template.yml       # Template for new dependencies
+├── _schema.yml         # JSON schema for validation
+├── base.yml           # Base system packages (list format)
+├── cuda.yml           # CUDA packages (list format)
+├── tensorrt-llm.yml   # Individual dependency
+├── pytorch.yml
+└── ... (23 total files)
+```
+
+## File Formats
+
+### Individual Dependency File
+
+Most dependencies use this format:
+
+```yaml
+# metadata/pytorch.yml
+
+name: pytorch                      # Required: canonical name
+description: PyTorch machine learning framework  # Required: min 10 chars
+
+license: BSD-3-Clause              # Optional: SPDX identifier
+copyright: Copyright (c) PyTorch Contributors    # Optional
+homepage: https://pytorch.org/     # Optional: URL
+source: container                  # Optional: how obtained (container, submodule, fetched)
+
+basename_matches:                  # Exact basename matches
+  - libtorch.so
+  - libc10.so
+
+linker_flags_matches:              # Linker flags (-l flags)
+  - -ltorch_python
+
+directory_matches:                 # Directory path patterns
+  - ATen
+  - c10
+  - torch
+```
+
+### List Format (base.yml, cuda.yml)
+
+System packages use a list format for compactness:
+
+```yaml
+# metadata/base.yml or cuda.yml
+
+dependencies:
+  - name: libc6
+    description: GNU C Library
+    source: container
+    basename_matches:
+      - libc.so.6
+    linker_flags_matches:
+      - -lc
+      - -lpthread
+      - -ldl
+    directory_matches: []
+
+  - name: libstdc++6
+    description: GNU C++ Library
+    source: container
+    basename_matches:
+      - libstdc++.so.6
+    linker_flags_matches:
+      - -lstdc++
+    directory_matches: []
+  # ... more system libraries
+```
+
+## Field Names Reference
+
+**Current field names** (as of latest schema):
+- `basename_matches` - Exact filename matches (not "patterns")
+- `linker_flags_matches` - Linker flags (not "linker_flags")
+- `directory_matches` - Path component patterns (not "path_components")
+
+## Iterative Pattern Development
+
+This section describes the recommended workflow for achieving high coverage through iterative pattern refinement.
+
+### Workflow Steps
+
+1. **Run the scanner** on your build directory:
+   ```bash
+   python scan_build_artifacts.py --build-dir /path/to/build
+   ```
+
+2. **Examine scan_output/unknown.yml** to identify unmapped artifacts:
+   ```bash
+   cat scan_output/unknown.yml
+   ```
+
+   Example output:
+   ```yaml
+   summary:
+     count: 42
+     action_required: Add patterns to YAML files in metadata/ for these artifacts
+
+   artifacts:
+     - /build/3rdparty/newlib/include/foo.h
+     - /usr/local/cuda-13.0/include/cuda.h
+     - libfoo.so
+     - -lbar
+   ```
+
+3. **Analyze patterns** in unknown artifacts:
+   - Group artifacts by logical dependency
+   - Identify common directory paths
+   - Note exact library names and linker flags
+   - Look for version patterns (e.g., cuda-12.9, cuda-13.0)
+
+4. **Add or update patterns** in metadata YAML files:
+   - For new dependencies: Copy `_template.yml` and create new file
+   - For existing dependencies: Update relevant YAML file
+   - Use the most powerful matching strategy (see below)
+
+5. **Validate your changes**:
+   ```bash
+   python scan_build_artifacts.py --validate
+   ```
+
+6. **Re-run scanner** to verify improvements:
+   ```bash
+   python scan_build_artifacts.py
+   ```
+
+7. **Check results**:
+   ```bash
+   # Check summary in scan_output/known.yml
+   grep "coverage:" scan_output/known.yml
+
+   # Check remaining unknowns
+   grep "count:" scan_output/unknown.yml
+   ```
+
+8. **Repeat** steps 2-7 until `scan_output/unknown.yml` shows `count: 0`
+
+### Achieving 100% Coverage
+
+The goal is to reduce unknown artifacts to zero. Key strategies:
+
+- **Start with directory_matches**: Most powerful pattern type (see below)
+- **Use version-agnostic patterns**: Match across multiple versions (see next section)
+- **Group related artifacts**: Single dependency file can match headers, libs, and linker flags
+- **Test incrementally**: Add patterns for one dependency at a time
+- **Validate frequently**: Catch syntax errors early with `--validate`
+
+## Version-Agnostic Pattern Matching
+
+For dependencies with multiple versions (e.g., CUDA 12.9, 13.0), use patterns that match all versions.
+
+### Problem
+
+Artifacts from different CUDA versions:
+```
+/usr/local/cuda-12.9/include/cuda.h
+/usr/local/cuda-13.0/include/cuda.h
+/usr/local/cuda/include/cuda.h
+```
+
+### Solution: Version-Agnostic Patterns
+
+Use `directory_matches` with version-agnostic patterns:
+
+```yaml
+# metadata/cuda.yml
+name: cuda-cudart
+description: CUDA Runtime Library
+
+directory_matches:
+  - cuda-12.9      # Matches /cuda-12.9/ paths
+  - cuda-13.0      # Matches /cuda-13.0/ paths
+  - cuda           # Matches /cuda/ paths (generic fallback)
+```
+
+### When to Use This Approach
+
+- **Multiple versions installed**: Different CUDA/TensorRT versions in same environment
+- **Version symlinks**: Generic paths like `/usr/local/cuda/` alongside versioned ones
+- **Forward compatibility**: Pattern works for future versions without updates
+- **Container evolution**: Handles version changes between container builds
+
+### Best Practices
+
+1. **List specific versions first**: More specific patterns take priority
+   ```yaml
+   directory_matches:
+     - cuda-12.9    # Specific version
+     - cuda-13.0    # Specific version
+     - cuda         # Generic fallback
+   ```
+
+2. **Use with basename_matches**: Combine with exact filename matching
+   ```yaml
+   basename_matches:
+     - libcudart.so.12
+     - libcudart.so.13
+
+   directory_matches:
+     - cuda-12.9
+     - cuda-13.0
+     - cuda
+   ```
+
+3. **Test across versions**: Verify patterns work with different installations
+
+4. **Document version ranges**: Add comments for clarity
+   ```yaml
+   directory_matches:
+     - cuda-12.9    # CUDA 12.9.x
+     - cuda-13.0    # CUDA 13.0.x
+     - cuda         # Generic (all versions)
+   ```
+
+## Adding Patterns
+
+### When You See Unknown Artifacts
+
+After running the scanner, check `scan_output/unknown.yml`:
+
+```yaml
+summary:
+  count: 2
+  action_required: Add patterns to YAML files in metadata/ for these artifacts
+
+artifacts:
+  - /build/3rdparty/newlib/include/foo.h
+  - libfoo.so
+```
+
+### Option A: Add to Existing Dependency
+
+If `libfoo.so` belongs to an existing dependency (e.g., `pytorch`):
+
+1. Open `metadata/pytorch.yml`
+2. Add to the `basename_matches` list:
+   ```yaml
+   basename_matches:
+     - libtorch.so
+     - libfoo.so      # ← Add here
+   ```
+3. Re-run scanner:
+   ```bash
+   python ../scan_build_artifacts.py
+   ```
+
+### Option B: Create New Dependency
+
+If this is a new dependency:
+
+1. Copy the template:
+   ```bash
+   cd metadata/
+   cp _template.yml foo-library.yml
+   ```
+
+2. Edit the file:
+   ```yaml
+   name: foo-library
+   description: Foo library for data processing
+   source: submodule
+
+   basename_matches:
+     - libfoo.so
+     - libfoo.a
+
+   linker_flags_matches:
+     - -lfoo
+
+   directory_matches:
+     - foo-library
+   ```
+
+3. Validate and re-run:
+   ```bash
+   python ../scan_build_artifacts.py --validate
+   python ../scan_build_artifacts.py
+   ```
+
+## Pattern Matching Behavior
+
+The scanner uses a **3-tier matching strategy**:
+
+### 1. Exact Pattern Matching (HIGH confidence)
+Matches exact filenames or linker flags:
+
+**Basename matches:**
+```yaml
+basename_matches:
+  - libcudart.so.12      # Matches only "libcudart.so.12" exactly
+  - libcudart.so.12.0    # Matches only "libcudart.so.12.0" exactly
+```
+
+**Linker flags:**
+```yaml
+linker_flags_matches:
+  - -lpthread    # Matches "-lpthread" in link.txt
+  - -lcudart     # Matches "-lcudart"
+```
+
+### 2. Path Alias Matching (MEDIUM confidence)
+Matches directory components in paths. **Now supports multi-directory patterns!**
+
+**Single component:**
+```yaml
+directory_matches:
+  - pytorch      # Matches any path containing /pytorch/
+                 # Example: /build/pytorch/include/torch.h ✓
+```
+
+**Multi-directory (NEW):**
+```yaml
+directory_matches:
+  - 3rdparty/cutlass           # Matches /3rdparty/cutlass/ sequence
+  - external/NVIDIA/cutlass    # Matches full /external/NVIDIA/cutlass/ sequence
+```
+
+**Matching rules:**
+- Exact component match only (no substring matching)
+- `"foo/bar"` matches `/home/foo/bar/file.h` ✓
+- `"foo/bar"` does NOT match `/home/foobar/file.h` ✗
+- `"oo/ba"` does NOT match `/foo/bar/file.h` ✗
+- Rightmost match wins if pattern appears multiple times in path
+
+### 3. Generic Inference (LOW confidence)
+Fallback: extracts library name from `-lfoo` → `foo`
+
+### Pattern Matching Power Ranking
+
+**Most Powerful → Least Powerful:**
+
+1. **directory_matches** - Matches entire directories of headers/files
+   - Example: `directory_matches: [pytorch]` matches 4,822+ PyTorch headers
+   - Single pattern can cover hundreds or thousands of artifacts
+
+2. **basename_matches** - Matches specific library files
+   - Example: `basename_matches: [libtorch.so]` matches one library
+   - Good for targeting specific libraries
+
+3. **linker_flags_matches** - Matches linker flags in link.txt files
+   - Example: `linker_flags_matches: [-ltorch]` matches one linker flag
+   - Useful for libraries without headers in build
+
+**Recommendation:** Start with `directory_matches` for maximum coverage with minimal patterns.
+
+## Required Fields
+
+Every dependency MUST have:
+
+```yaml
+name: my-dep        # Required: lowercase, hyphenated, + allowed
+description: "..."  # Required: minimum 10 characters
+```
+
+At least one pattern section is required:
+- `basename_matches` (exact filenames)
+- `linker_flags_matches` (-l flags)
+- `directory_matches` (path components)
+
+## Optional Fields
+
+Recommended for attribution/licensing:
+
+```yaml
+version: "1.0"                           # Optional: version string
+license: "Apache-2.0"                    # Optional: SPDX identifier
+copyright: "Copyright 2024 NVIDIA"       # Optional: copyright notice
+homepage: "https://example.com"          # Optional: project URL
+source: "submodule"                      # Optional: how obtained
+```
+
+Valid `source` values:
+- `submodule` - Git submodules in 3rdparty/ directory
+- `container` - Pre-installed in container image (e.g., PyTorch, CUDA)
+- `fetched` - Downloaded from URL and built from source
+
+## Multi-Directory Pattern Examples
+
+### Example 1: Vendor Directory Boundaries
+
+```yaml
+# metadata/cutlass.yml
+name: cutlass
+description: CUDA Templates for Linear Algebra Subroutines
+source: submodule
+
+directory_matches:
+  - cutlass              # Single: matches any /cutlass/ in path
+  - 3rdparty/cutlass     # Multi: matches /3rdparty/cutlass/ sequence
+  - external/NVIDIA/cutlass  # Multi: matches full sequence
+```
+
+**Why multi-directory?** Prevents false positives:
+- `"cutlass"` alone might match `/other-project/cutlass/` (unwanted)
+- `"3rdparty/cutlass"` is more specific and safer
+
+### Example 2: Nested Dependencies
+
+```yaml
+# metadata/dlpack.yml
+name: dlpack
+description: Deep Learning Pack
+source: submodule
+
+directory_matches:
+  - 3rdparty/xgrammar/3rdparty/dlpack  # Nested submodule path
+```
+
+Matches `/build/3rdparty/xgrammar/3rdparty/dlpack/include/dlpack.h`
+
+## Finding Which File to Edit
+
+Search by library name:
+
+```bash
+cd metadata/
+grep -r "libtorch.so" .
+# Output: ./pytorch.yml:  - libtorch.so
+```
+
+Search by dependency name:
+
+```bash
+grep "^name: pytorch" *.yml
+# Output: pytorch.yml:name: pytorch
+```
+
+List all dependencies:
+
+```bash
+grep "^name:" *.yml | sort
+```
+
+Search in list format files (base.yml, cuda.yml):
+
+```bash
+grep -A 5 "name: libc6" base.yml
+```
+
+## Validation
+
+### Manual Validation
+
+After adding patterns, validate the YAML structure:
+
+```bash
+python ../scan_build_artifacts.py --validate
+```
+
+Expected output:
+
+```
+================================================================================
+YAML Validation
+================================================================================
+
+✓ base.yml:libc6
+✓ base.yml:libstdc++6
+✓ cuda.yml:cuda-cudart-dev
+✓ pytorch.yml
+✓ tensorrt-llm.yml
+...
+
+================================================================================
+Results: 25/25 valid, 0/25 invalid
+================================================================================
+```
+
+### Re-run Scanner
+
+After adding patterns, re-run the scanner:
+
+```bash
+python ../scan_build_artifacts.py
+```
+
+Check `scan_output/unknown.yml` - should have fewer (or zero) artifacts:
+
+```yaml
+summary:
+  count: 0  # Improved from previous run!
+  coverage: 100.0%
+
+artifacts: []
+```
+
+### Schema Validation
+
+The `_schema.yml` file defines validation rules:
+- Required fields: `name`, `description`
+- Field types (string, array, etc.)
+- Field patterns (e.g., linker flags must start with `-l`)
+- Minimum lengths
+- Unique items in arrays
+
+## Common Mistakes
+
+### 1. Using Old Field Names
+
+```yaml
+patterns: [...]           # ❌ Wrong (old name)
+basename_matches: [...]   # ✓ Correct
+
+linker_flags: [...]       # ❌ Wrong (old name)
+linker_flags_matches: [...] # ✓ Correct
+
+path_components: [...]    # ❌ Wrong (old name)
+directory_matches: [...]  # ✓ Correct
+```
+
+### 2. Missing Required Fields
+
+```yaml
+name: my-dep        # ✓ Required
+description: "..."  # ✓ Required (min 10 chars)
+```
+
+### 3. Empty Pattern Sections
+
+```yaml
+basename_matches: []        # ❌ Need at least one pattern section
+linker_flags_matches: []
+directory_matches: []
+```
+
+Must have at least one of: `basename_matches`, `linker_flags_matches`, or `directory_matches`
+
+### 4. Wrong Linker Flag Format
+
+```yaml
+linker_flags_matches:
+  - pthread         # ❌ Wrong
+  - -lpthread       # ✓ Correct (must start with -l)
+```
+
+### 5. Substring Matching in directory_matches
+
+```yaml
+directory_matches:
+  - oo/ba           # ❌ Won't match /foo/bar/ (no substring matching)
+  - foo/bar         # ✓ Correct (exact component match)
+```
+
+### 6. Invalid source Field
+
+```yaml
+source: apt           # ❌ Wrong (old enum value)
+source: container     # ✓ Correct (new enum)
+source: pip           # ❌ Wrong (old enum value)
+source: submodule     # ✓ Correct (new enum)
+```
+
+### 7. Duplicate Patterns Across Files
+
+Scanner will warn if same pattern appears in multiple files:
+
+```
+Warning: Duplicate pattern 'libfoo.so' found in bar.yml
+(previously mapped to 'foo', now 'bar')
+```
+
+Last loaded file wins (alphabetical order). Remove duplicates.
+
+### 8. Invalid Name Format
+
+```yaml
+name: MyDep         # ❌ Wrong (uppercase)
+name: my_dep        # ❌ Wrong (underscore)
+name: my-dep        # ✓ Correct (lowercase, hyphenated)
+name: cuda-12       # ✓ Correct (numbers ok)
+name: libstdc++6    # ✓ Correct (+ allowed)
+```
+
+## Troubleshooting
+
+### Issue: Unknown artifacts not resolving after adding pattern
+
+**Cause**: Pattern doesn't match artifact path.
+
+**Solution**:
+1. Check exact artifact path in `scan_output/unknown.yml`
+2. Use correct field names: `basename_matches`, not `patterns`
+3. For directories, use `directory_matches`
+4. Check for typos in pattern
+
+Example:
+```yaml
+# If unknown.yml shows:
+artifacts:
+  - /build/pytorch/lib/libtorch.so.2.0
+
+# Add exact match:
+basename_matches:
+  - libtorch.so.2.0
+
+# OR use directory matching:
+directory_matches:
+  - pytorch
+```
+
+### Issue: Multi-directory pattern not working
+
+**Cause**: Substring matching expectations.
+
+**Solution**:
+- Multi-directory patterns require **exact component matches**
+- `"oo/ba"` will NOT match `/foo/bar/`
+- Use full component names: `"foo/bar"`
+
+Example:
+```yaml
+directory_matches:
+  - vendor/cutlass      # ✓ Matches /vendor/cutlass/
+  - cutlass             # ✓ Also works (single component)
+  - end/cutlass         # ❌ Won't match /vendor/cutlass/ (no substring matching)
+```
+
+### Issue: dpkg.yml not found
+
+**Cause**: File was renamed to `base.yml`.
+
+**Solution**:
+```bash
+# Old (incorrect)
+grep "pattern" metadata/dpkg.yml
+
+# New (correct)
+grep "pattern" metadata/base.yml
+```
+
+### Issue: Validation fails with schema error
+
+**Cause**: YAML structure doesn't match schema.
+
+**Solution**:
+1. Compare with `_template.yml`
+2. Ensure required fields present (`name`, `description`)
+3. Check linker flags start with `-l`
+4. Use correct field names: `basename_matches`, `linker_flags_matches`, `directory_matches`
+
+Example error:
+```
+❌ foo.yml: 'description' is too short (minimum 10 characters)
+```
+
+Fix:
+```yaml
+description: "Foo library for data processing"  # At least 10 chars
+```
+
+### Issue: Coverage decreased after changes
+
+**Cause**: Removed or moved patterns incorrectly.
+
+**Solution**:
+1. Check git diff to see what changed
+2. Re-add removed patterns
+3. Run validation to ensure no syntax errors
+
+```bash
+git diff metadata/
+python ../scan_build_artifacts.py --validate
+```
+
+## Best Practices
+
+1. **One dependency per file** (except base.yml/cuda.yml for system libs)
+2. **Use descriptive names**: `cuda-cudart-12` not `cudart12`
+3. **Use multi-directory patterns** for vendored dependencies to avoid false positives
+4. **Add metadata** (license, copyright, homepage) for attribution
+5. **Validate after changes**: `python ../scan_build_artifacts.py --validate`
+6. **Test coverage**: Re-run scanner after adding patterns
+7. **Use correct field names**: `basename_matches`, not `patterns`
+8. **Keep base.yml for system libraries only** (resolved via dpkg-query)
+9. **Use `source: container`** for pre-installed packages (PyTorch, CUDA)
+10. **Use `source: submodule`** for 3rdparty/ git submodules
+11. **Start with directory_matches**: Most powerful pattern type for coverage
+12. **Use version-agnostic patterns**: Match multiple versions with single pattern
+
+## Resolution Strategy
+
+The scanner uses a **two-tier resolution strategy**:
+
+### PRIMARY: dpkg-query
+- System-installed packages
+- High confidence
+- Handles all CUDA, system libraries automatically
+
+### FALLBACK: YAML Patterns
+Only used when dpkg-query doesn't know about the artifact:
+1. Exact basename match → High confidence
+2. Exact linker flag match → High confidence
+3. Directory alias match → Medium confidence
+4. Generic library inference → Low confidence
+
+**Key insight**: Most CUDA and system packages are resolved via dpkg-query (PRIMARY), not YAML patterns. This is why `cuda.yml` and `base.yml` are sparse - they only contain fallback patterns for artifacts dpkg doesn't know about.
+
+## Example: Complete Dependency File
+
+```yaml
+# metadata/cutlass.yml
+
+name: cutlass
+description: CUDA Templates for Linear Algebra Subroutines
+
+version: "3.5.0"
+license: BSD-3-Clause
+copyright: Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES
+homepage: https://github.com/NVIDIA/cutlass
+source: submodule
+
+basename_matches:
+  - libcutlass.a
+
+linker_flags_matches:
+  - -lcutlass
+
+directory_matches:
+  - cutlass
+  - 3rdparty/cutlass           # Multi-directory: prevents false positives
+  - external/NVIDIA/cutlass    # Multi-directory: vendor-specific path
+```
+
+## Schema Reference
+
+See `_schema.yml` for full JSON schema definition.
+
+Key constraints:
+- `name`: Required, string, pattern `^[a-z0-9-+]+$`, min length 1
+- `description`: Required, string, min length 10
+- `version`: Optional, string, min length 1
+- `basename_matches`: Optional, array of strings, unique items
+- `linker_flags_matches`: Optional, array of strings matching `^-l`, unique items
+- `directory_matches`: Optional, array of strings, unique items (supports multi-directory)
+- `source`: Optional, enum (submodule/container/fetched)
+
+At least one of `basename_matches`, `linker_flags_matches`, or `directory_matches` required.
+
+## Support
+
+For issues or questions:
+- Review `_schema.yml` for validation rules
+- See `_template.yml` for new dependency template
+- Run `python ../scan_build_artifacts.py --help` for CLI options
+- Check scanner source code: `scan_build_artifacts.py` (PatternMatcher class, lines 620-926)
+- Review output files: `scan_output/known.yml` and `scan_output/unknown.yml`
+- See main README: `../README.md` for architecture and workflow details
diff --git a/cpp/dependency_scan/metadata/_schema.yml b/cpp/dependency_scan/metadata/_schema.yml
new file mode 100644
index 00000000000..dc0e82d8002
--- /dev/null
+++ b/cpp/dependency_scan/metadata/_schema.yml
@@ -0,0 +1,92 @@
+# JSON Schema for validating dependency YAML files
+# Used by scanner to validate structure and required fields
+
+type: object
+
+required:
+  - name
+  - description
+
+properties:
+  name:
+    type: string
+    pattern: "^[a-z0-9-+]+$"
+    description: "Canonical dependency name (lowercase, hyphenated, + allowed)"
+    minLength: 1
+
+  version:
+    type: string
+    description: "Version string (semantic versioning recommended)"
+    minLength: 1
+
+  description:
+    type: string
+    description: "Brief description of the dependency"
+    minLength: 10
+
+  license:
+    type: string
+    description: "SPDX license identifier or license name"
+
+  copyright:
+    type: string
+    description: "Copyright notice"
+
+  homepage:
+    type: string
+    format: uri
+    description: "URL to project homepage or repository"
+
+  source:
+    type: string
+    enum: ["submodule", "container", "fetched"]
+    description: |
+      How this dependency is obtained:
+        - submodule: Git submodules in 3rdparty/ directory
+        - container: Pre-installed in the container image (e.g., PyTorch)
+        - fetched: Downloaded from URL and built from source
+
+  basename_matches:
+    type: array
+    items:
+      type: string
+      minLength: 1
+    uniqueItems: true
+    description: "Exact basename matches (filename or library name)"
+
+  linker_flags_matches:
+    type: array
+    items:
+      type: string
+      pattern: "^-l"
+      minLength: 3
+    uniqueItems: true
+    description: "Linker flags (-l flags)"
+
+  directory_matches:
+    type: array
+    items:
+      type: string
+      minLength: 1
+    uniqueItems: true
+    description: |
+      Directory path patterns (single or multi-directory).
+      Supports exact consecutive component matching (no substring matching).
+
+      Examples:
+        - "pytorch" (single component: matches any path containing /pytorch/)
+        - "3rdparty/cutlass" (multi-directory: matches /3rdparty/cutlass/ sequence)
+        - "foo/bar/baz" (multi-directory: matches /foo/bar/baz/ sequence)
+
+      Matching behavior:
+        - Exact component match only (no substrings)
+        - "foo/bar" matches "/home/foo/bar/file.h" ✓
+        - "foo/bar" does NOT match "/home/foobar/file.h" ✗
+        - "oo/ba" does NOT match "/foo/bar/file.h" ✗
+        - Rightmost match wins if pattern appears multiple times in path
+
+# At least one pattern matching section required
+anyOf:
+  - required: ["basename_matches"]
+  - required: ["linker_flags_matches"]
+  - required: ["directory_matches"]
diff --git a/cpp/dependency_scan/metadata/_template.yml b/cpp/dependency_scan/metadata/_template.yml
new file mode 100644
index 00000000000..17c1980d190
--- /dev/null
+++ b/cpp/dependency_scan/metadata/_template.yml
@@ -0,0 +1,14 @@
+name: my-dependency
+description: Brief description of the dependency (minimum 10 characters)
+license: ''
+copyright: ''
+homepage: ''
+source: ''  # submodule, container, or fetched
+basename_matches:
+- libexample.so
+- libexample.so.1
+linker_flags_matches:
+- -lexample
+directory_matches:
+- example_dir          # Single component
+- 3rdparty/example     # Multi-directory (if bundled in vendor dir)
diff --git a/cpp/dependency_scan/metadata/base.yml b/cpp/dependency_scan/metadata/base.yml
new file mode 100644
index 00000000000..79581bca4f9
--- /dev/null
+++ b/cpp/dependency_scan/metadata/base.yml
@@ -0,0 +1,13 @@
+dependencies:
+- name: libc6
+  description: GNU C Library - shared libraries and system calls
+  source: container
+  path_components: []
+  aliases: []
+  basename_matches: []
+  linker_flags_matches:
+  - -lc
+  - -ldl
+  - -lm
+  - -lpthread
+  - -lrt
diff --git a/cpp/dependency_scan/metadata/cppzmq.yml b/cpp/dependency_scan/metadata/cppzmq.yml
new file mode 100644
index 00000000000..7e119a567fe
--- /dev/null
+++ b/cpp/dependency_scan/metadata/cppzmq.yml
@@ -0,0 +1,10 @@
+name: cppzmq
+description: Header-only C++ binding for libzmq
+license: MIT
+copyright: Copyright (c) 2016-2024 zeromq.org
+homepage: https://github.com/zeromq/cppzmq
+source: submodule
+basename_matches: []
+linker_flags_matches: []
+directory_matches:
+- cppzmq
diff --git a/cpp/dependency_scan/metadata/cuda.yml b/cpp/dependency_scan/metadata/cuda.yml
new file mode 100644
index 00000000000..5fae45fa764
--- /dev/null
+++ b/cpp/dependency_scan/metadata/cuda.yml
@@ -0,0 +1,22 @@
+dependencies:
+- name: cuda-cudart-dev
+  description: CUDA Runtime API development libraries and headers
+  source: container
+  basename_matches: []
+  linker_flags_matches:
+  - -lcudadevrt
+  directory_matches:
+  - cuda-12.9
+  - cuda-13.0
+  - cuda
+  - cooperative_groups
+  - cub
+  - thrust
+- name: cuda-cudart-static
+  description: CUDA Runtime static library for device code linking
+  source: container
+  basename_matches:
+  - libcudart_static.a
+  linker_flags_matches:
+  - -lcudart_static
+  directory_matches: []
diff --git a/cpp/dependency_scan/metadata/cutlass.yml b/cpp/dependency_scan/metadata/cutlass.yml
new file mode 100644
index 00000000000..a8b5e767064
--- /dev/null
+++ b/cpp/dependency_scan/metadata/cutlass.yml
@@ -0,0 +1,10 @@
+name: cutlass
+description: CUDA Templates for Linear Algebra Subroutines
+license: BSD-3-Clause
+copyright: Copyright (c) 2017-2024 NVIDIA Corporation
+homepage: https://github.com/NVIDIA/cutlass
+source: submodule
+basename_matches: []
+linker_flags_matches: []
+directory_matches:
+- cutlass
diff --git a/cpp/dependency_scan/metadata/deep-ep.yml b/cpp/dependency_scan/metadata/deep-ep.yml
new file mode 100644
index 00000000000..31bc48f03c5
--- /dev/null
+++ b/cpp/dependency_scan/metadata/deep-ep.yml
@@ -0,0 +1,7 @@
+name: deep-ep
+description: DeepEP distributed execution platform for distributed training
+source: fetched
+basename_matches: []
+linker_flags_matches: []
+directory_matches:
+- deep_ep_download-src
diff --git a/cpp/dependency_scan/metadata/deepgemm.yml b/cpp/dependency_scan/metadata/deepgemm.yml
new file mode 100644
index 00000000000..af4f2350d20
--- /dev/null
+++ b/cpp/dependency_scan/metadata/deepgemm.yml
@@ -0,0 +1,7 @@
+name: deepgemm
+description: DeepGEMM optimized matrix multiplication library
+source: submodule
+basename_matches: []
+linker_flags_matches: []
+directory_matches:
+- DeepGEMM
diff --git a/cpp/dependency_scan/metadata/dlpack.yml b/cpp/dependency_scan/metadata/dlpack.yml
new file mode 100644
index 00000000000..e2aad21669c
--- /dev/null
+++ b/cpp/dependency_scan/metadata/dlpack.yml
@@ -0,0 +1,11 @@
+name: dlpack
+description: Common in-memory tensor structure for deep learning frameworks (vendored
+  in xgrammar)
+license: Apache-2.0
+copyright: Copyright 2017 by Contributors
+homepage: https://github.com/dmlc/dlpack
+source: submodule
+basename_matches: []
+linker_flags_matches: []
+directory_matches:
+- dlpack
diff --git a/cpp/dependency_scan/metadata/fmt.yml b/cpp/dependency_scan/metadata/fmt.yml
new file mode 100644
index 00000000000..6d2f6ad28ed
--- /dev/null
+++ b/cpp/dependency_scan/metadata/fmt.yml
@@ -0,0 +1,11 @@
+name: fmt
+description: A modern formatting library for C++
+license: MIT (with exception)
+copyright: Copyright (c) 2012-2024 Victor Zverovich
+homepage: https://github.com/fmtlib/fmt
+source: submodule
+basename_matches: []
+linker_flags_matches:
+- -lfmt
+directory_matches:
+- fmt
diff --git a/cpp/dependency_scan/metadata/hedley.yml b/cpp/dependency_scan/metadata/hedley.yml
new file mode 100644
index 00000000000..195080947a4
--- /dev/null
+++ b/cpp/dependency_scan/metadata/hedley.yml
@@ -0,0 +1,10 @@
+name: hedley
+description: Header-only C/C++ compatibility library (vendored in nlohmann-json)
+license: CC0-1.0
+copyright: Copyright 2016-2021 Evan Nemerson
+homepage: https://nemequ.github.io/hedley/
+source: submodule
+basename_matches: []
+linker_flags_matches: []
+directory_matches:
+- hedley
diff --git a/cpp/dependency_scan/metadata/nanobind.yml b/cpp/dependency_scan/metadata/nanobind.yml
new file mode 100644
index 00000000000..559aa4d534d
--- /dev/null
+++ b/cpp/dependency_scan/metadata/nanobind.yml
@@ -0,0 +1,10 @@
+name: nanobind
+description: Tiny and efficient C++/Python bindings
+license: BSD-3-Clause
+copyright: Copyright (c) 2022-2024 Wenzel Jakob
+homepage: https://github.com/wjakob/nanobind
+source: submodule
+basename_matches: []
+linker_flags_matches: []
+directory_matches:
+- nanobind
diff --git a/cpp/dependency_scan/metadata/nlohmann-json.yml b/cpp/dependency_scan/metadata/nlohmann-json.yml
new file mode 100644
index 00000000000..267945c4b29
--- /dev/null
+++ b/cpp/dependency_scan/metadata/nlohmann-json.yml
@@ -0,0 +1,10 @@
+name: nlohmann-json
+description: JSON for Modern C++ header-only library
+license: MIT
+copyright: Copyright (c) 2013-2024 Niels Lohmann
+homepage: https://github.com/nlohmann/json
+source: submodule
+basename_matches: []
+linker_flags_matches: []
+directory_matches:
+- json
diff --git a/cpp/dependency_scan/metadata/numa.yml b/cpp/dependency_scan/metadata/numa.yml
new file mode 100644
index 00000000000..75c2b9bc2a3
--- /dev/null
+++ b/cpp/dependency_scan/metadata/numa.yml
@@ -0,0 +1,8 @@
+name: libnuma1
+description: NUMA memory allocation library
+source: container
+path_components: []
+aliases: []
+basename_matches: []
+linker_flags_matches:
+- -lnuma
diff --git a/cpp/dependency_scan/metadata/nvshmem.yml b/cpp/dependency_scan/metadata/nvshmem.yml
new file mode 100644
index 00000000000..dcb68972a3a
--- /dev/null
+++ b/cpp/dependency_scan/metadata/nvshmem.yml
@@ -0,0 +1,8 @@
+name: nvshmem
+description: NVIDIA Shared Memory (NVSHMEM) library for inter-GPU communication
+license: NVIDIA
+homepage: https://developer.nvidia.com/nvshmem
+source: fetched
+basename_matches: []
+linker_flags_matches: []
+directory_matches: []
diff --git a/cpp/dependency_scan/metadata/openmpi.yml b/cpp/dependency_scan/metadata/openmpi.yml
new file mode 100644
index 00000000000..72cb1900193
--- /dev/null
+++ b/cpp/dependency_scan/metadata/openmpi.yml
@@ -0,0 +1,6 @@
+name: openmpi
+description: openmpi libraries and components
+basename_matches: []
+linker_flags_matches: []
+directory_matches:
+- ompi
diff --git a/cpp/dependency_scan/metadata/picojson.yml b/cpp/dependency_scan/metadata/picojson.yml
new file mode 100644
index 00000000000..adc23723630
--- /dev/null
+++ b/cpp/dependency_scan/metadata/picojson.yml
@@ -0,0 +1,10 @@
+name: picojson
+description: Header-only JSON parser/serializer in C++ (vendored in xgrammar)
+license: BSD-2-Clause
+copyright: Copyright 2009-2010 Cybozu Labs, Inc., Copyright 2011-2014 Kazuho Oku
+homepage: https://github.com/kazuho/picojson
+source: submodule
+basename_matches: []
+linker_flags_matches: []
+directory_matches:
+- picojson
diff --git a/cpp/dependency_scan/metadata/pybind11.yml b/cpp/dependency_scan/metadata/pybind11.yml
new file mode 100644
index 00000000000..b493bc6be3c
--- /dev/null
+++ b/cpp/dependency_scan/metadata/pybind11.yml
@@ -0,0 +1,10 @@
+name: pybind11
+description: Seamless operability between C++11 and Python
+license: BSD-3-Clause
+copyright: Copyright (c) 2016-2024 Wenzel Jakob
+homepage: https://github.com/pybind/pybind11
+source: submodule
+basename_matches: []
+linker_flags_matches: []
+directory_matches:
+- pybind11
diff --git a/cpp/dependency_scan/metadata/pytorch.yml b/cpp/dependency_scan/metadata/pytorch.yml
new file mode 100644
index 00000000000..e89b056e60a
--- /dev/null
+++ b/cpp/dependency_scan/metadata/pytorch.yml
@@ -0,0 +1,15 @@
+name: pytorch
+description: pytorch libraries and components
+source: container
+basename_matches:
+- libc10.so
+- libc10_cuda.so
+- libtorch.so
+- libtorch_python.so
+linker_flags_matches:
+- -ltorch_python
+directory_matches:
+- ATen
+- c10
+- caffe2
+- torch
diff --git a/cpp/dependency_scan/metadata/robin-map.yml b/cpp/dependency_scan/metadata/robin-map.yml
new file mode 100644
index 00000000000..601cf08ef8b
--- /dev/null
+++ b/cpp/dependency_scan/metadata/robin-map.yml
@@ -0,0 +1,14 @@
+name: robin-map
+description: Fast hash map and hash set using robin hood hashing
+license: MIT
+copyright: Copyright (c) 2017 Thibaut Goetghebuer-Planchon
+homepage: https://github.com/Tessil/robin-map
+source: submodule
+basename_matches:
+- robin_map.h
+- robin_hash.h
+- robin_growth_policy.h
+linker_flags_matches: []
+directory_matches:
+- robin_map
+- tsl
diff --git a/cpp/dependency_scan/metadata/tensorrt-llm.yml b/cpp/dependency_scan/metadata/tensorrt-llm.yml
new file mode 100644
index 00000000000..0766739b3b8
--- /dev/null
+++ b/cpp/dependency_scan/metadata/tensorrt-llm.yml
@@ -0,0 +1,12 @@
+name: tensorrt-llm
+description: TensorRT-LLM core libraries and Python bindings built by this project
+license: Apache-2.0
+homepage: https://github.com/NVIDIA/TensorRT-LLM
+source: submodule
+basename_matches:
+- deep_ep_cpp_tllm.cpython-312-x86_64-linux-gnu.so
+- deep_gemm_cpp_tllm.cpython-312-x86_64-linux-gnu.so
+linker_flags_matches:
+- -ltensorrt_llm
+directory_matches:
+- tensorrt_llm/cpp
diff --git a/cpp/dependency_scan/metadata/tensorrt.yml b/cpp/dependency_scan/metadata/tensorrt.yml
new file mode 100644
index 00000000000..b3c325c4c38
--- /dev/null
+++ b/cpp/dependency_scan/metadata/tensorrt.yml
@@ -0,0 +1,6 @@
+name: tensorrt
+description: tensorrt libraries and components
+basename_matches: []
+linker_flags_matches: []
+directory_matches:
+- tensorrt
diff --git a/cpp/dependency_scan/metadata/ucx.yml b/cpp/dependency_scan/metadata/ucx.yml
new file mode 100644
index 00000000000..95086c40f21
--- /dev/null
+++ b/cpp/dependency_scan/metadata/ucx.yml
@@ -0,0 +1,6 @@
+name: ucx
+description: ucx libraries and components
+basename_matches: []
+linker_flags_matches: []
+directory_matches:
+- ucx
diff --git a/cpp/dependency_scan/metadata/ucxx.yml b/cpp/dependency_scan/metadata/ucxx.yml
new file mode 100644
index 00000000000..19745c4619f
--- /dev/null
+++ b/cpp/dependency_scan/metadata/ucxx.yml
@@ -0,0 +1,8 @@
+name: ucxx
+description: C++ bindings for UCX (Unified Communication X)
+homepage: https://github.com/rapidsai/ucxx
+source: submodule
+basename_matches: []
+linker_flags_matches: []
+directory_matches:
+- ucxx
diff --git a/cpp/dependency_scan/metadata/xgrammar.yml b/cpp/dependency_scan/metadata/xgrammar.yml
new file mode 100644
index 00000000000..e5ca1f6d805
--- /dev/null
+++ b/cpp/dependency_scan/metadata/xgrammar.yml
@@ -0,0 +1,8 @@
+name: xgrammar
+description: XGrammar library for structured text generation
+homepage: https://github.com/mlc-ai/xgrammar
+source: submodule
+basename_matches: []
+linker_flags_matches: []
+directory_matches:
+- xgrammar
diff --git a/cpp/dependency_scan/metadata/zeromq.yml b/cpp/dependency_scan/metadata/zeromq.yml
new file mode 100644
index 00000000000..43a89b59bf4
--- /dev/null
+++ b/cpp/dependency_scan/metadata/zeromq.yml
@@ -0,0 +1,8 @@
+name: libzmq5
+description: ZeroMQ lightweight messaging kernel library
+source: container
+path_components: []
+aliases: []
+basename_matches: []
+linker_flags_matches:
+- -lzmq
diff --git a/cpp/dependency_scan/scan_build_artifacts.py b/cpp/dependency_scan/scan_build_artifacts.py
new file mode 100755
index 00000000000..e9a608e52ee
--- /dev/null
+++ b/cpp/dependency_scan/scan_build_artifacts.py
@@ -0,0 +1,1532 @@
+#!/usr/bin/env python3
+"""
+Minimal Build Artifact Scanner for TensorRT-LLM
+
+Scans D files (headers), link.txt files (libraries), and wheels (binaries)
+to generate a comprehensive dependency mapping report.
+
+Resolution Strategy:
+  PRIMARY: dpkg-query for system packages
+  FALLBACK: YAML patterns from dependencies/ directory
+
+Output:
+  - known.yml: Successfully mapped artifacts grouped by dependency (paths only)
+  - unknown.yml: Unmapped artifacts needing pattern additions (paths only)
+
+Usage:
+  python scan_build_artifacts.py --build-dir build/ --output-dir scan_output/
+  python scan_build_artifacts.py --validate  # Validate YAML files
+"""
+
+import argparse
+import os
+import re
+import subprocess
+import sys
+import tempfile
+import zipfile
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set
+
+import yaml
+
+try:
+    from jsonschema import ValidationError, validate
+    JSONSCHEMA_AVAILABLE = True
+except ImportError:
+    JSONSCHEMA_AVAILABLE = False
+    ValidationError = Exception  # Fallback for type hints
+
+# Configuration: Submodules directory location
+# This path points to the 3rdparty submodules directory.
+# Change this constant if dependencies move to a different location.
+# Current: TRTLLM_ROOT/3rdparty
+# Future: May change to ${CMAKE_BINARY_DIR}/_deps/
+THIRDPARTY_ROOT = Path(__file__).parent.parent.parent / '3rdparty'
+
+# ============================================================================
+# MODULE 1: Data Models
+# ============================================================================
+
+
+@dataclass
+class Artifact:
+    """Represents a discovered build artifact (header, library, or binary)"""
+    path: str  # Canonical resolved path
+    type: str  # 'header', 'library', 'binary'
+    source: str  # Which file discovered it (D file, link.txt, wheel)
+    context_dir: Optional[str] = None  # For relative path resolution
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for JSON serialization"""
+        return asdict(self)
+
+
+@dataclass
+class Mapping:
+    """Represents an artifact-to-dependency mapping"""
+    artifact: Artifact
+    dependency: str  # Canonical dependency name
+    confidence: str  # 'high', 'medium', 'low'
+    strategy: str  # Which resolution strategy succeeded
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for JSON serialization"""
+        result = asdict(self)
+        result['artifact'] = self.artifact.to_dict()
+        return result
+
+
+# ============================================================================
+# MODULE 2: DpkgResolver (PRIMARY)
+# ============================================================================
+
+
+class DpkgResolver:
+    """
+    Resolves artifacts to packages using dpkg-query (system package manager).
+
+    This is the PRIMARY resolution strategy for system-installed packages
+    (glibc, libstdc++, gcc, cuda-dev, etc.).
+
+    Algorithm:
+      1. For absolute paths: dpkg-query -S <path>
+      2. For -l flags: find_library_path() → dpkg-query -S <resolved_path>
+      3. Parse output: "package:arch: /path/to/file"
+      4. Cache results to avoid repeated queries
+      5. Normalize package names (remove :arch suffix, handle cuda packages)
+    """
+
+    def __init__(self):
+        self._cache: Dict[str, Optional[str]] = {}
+        self._lib_search_paths = self._get_library_search_paths()
+
+    def _get_library_search_paths(self) -> List[str]:
+        """
+        Get standard library search paths for resolving -l flags.
+
+        Returns system library directories in priority order:
+          - /lib/x86_64-linux-gnu
+          - /usr/lib/x86_64-linux-gnu
+          - /lib
+          - /usr/lib
+          - /usr/local/lib
+        """
+        paths = [
+            "/lib/x86_64-linux-gnu",
+            "/usr/lib/x86_64-linux-gnu",
+            "/lib",
+            "/usr/lib",
+            "/usr/local/lib",
+        ]
+
+        # Add LD_LIBRARY_PATH if set
+        ld_library_path = os.environ.get("LD_LIBRARY_PATH", "")
+        if ld_library_path:
+            paths.extend(ld_library_path.split(":"))
+
+        return [p for p in paths if os.path.isdir(p)]
+
+    def find_library_path(self, lib_name: str) -> Optional[str]:
+        """
+        Resolve linker flag (-lpthread) to actual library path.
+
+        Algorithm:
+          1. Strip -l prefix: "-lpthread" → "pthread"
+          2. Try patterns: libpthread.so, libpthread.so.*, libpthread.a
+          3. Search in standard library directories
+          4. Return first match or None
+
+        Examples:
+          -lpthread → /lib/x86_64-linux-gnu/libpthread.so.0
+          -lm → /lib/x86_64-linux-gnu/libm.so.6
+          -lstdc++ → /usr/lib/x86_64-linux-gnu/libstdc++.so.6
+        """
+        if lib_name.startswith("-l"):
+            lib_name = lib_name[2:]  # Remove -l prefix
+
+        # Try different library name patterns
+        patterns = [
+            f"lib{lib_name}.so",
+            f"lib{lib_name}.so.*",
+            f"lib{lib_name}.a",
+        ]
+
+        for search_path in self._lib_search_paths:
+            for pattern in patterns:
+                # Use glob to match version suffixes
+                import glob
+                matches = glob.glob(os.path.join(search_path, pattern))
+                if matches:
+                    # Return first match (highest priority)
+                    return matches[0]
+
+        return None
+
+    def get_package(self, file_path: str) -> Optional[str]:
+        """
+        Query dpkg for package owning the file.
+
+        Algorithm:
+          1. Check cache for previous result
+          2. Handle -l flags: find_library_path() first
+          3. Execute: dpkg-query -S <file_path>
+          4. Parse output: "package:arch: /path/to/file"
+          5. Extract package name, remove architecture suffix
+          6. Normalize CUDA packages: cuda-cccl-12-9 → cuda-cccl
+          7. Cache result and return
+
+        Examples:
+          /usr/include/c++/13/vector → libstdc++-13-dev
+          -lpthread → libc6
+          /usr/local/cuda-12.9/include/cuda.h → cuda-cudart-dev-12-9 → cuda-cudart-dev
+        """
+        # Check cache first
+        if file_path in self._cache:
+            return self._cache[file_path]
+
+        # Handle linker flags
+        if file_path.startswith("-l"):
+            resolved_path = self.find_library_path(file_path)
+            if not resolved_path:
+                self._cache[file_path] = None
+                return None
+            file_path = resolved_path
+
+        # Query dpkg
+        try:
+            result = subprocess.run(["dpkg-query", "-S", file_path],
+                                    capture_output=True,
+                                    text=True,
+                                    timeout=5)
+
+            if result.returncode != 0:
+                self._cache[file_path] = None
+                return None
+
+            # Parse output: "package:arch: /path/to/file"
+            output = result.stdout.strip()
+            if ":" in output:
+                package_part = output.split(":", 1)[0]
+                # Remove architecture suffix (package:amd64 → package)
+                package = package_part.split(
+                    ":")[0] if ":" in package_part else package_part
+
+                # Normalize CUDA packages
+                package = self._normalize_cuda_package(package)
+
+                self._cache[file_path] = package
+                return package
+
+        except (subprocess.TimeoutExpired, FileNotFoundError, Exception):
+            pass
+
+        self._cache[file_path] = None
+        return None
+
+    @staticmethod
+    def _normalize_cuda_package(package: str) -> str:
+        """
+        Normalize CUDA package names by removing version suffixes.
+
+        Examples:
+          cuda-cccl-12-9 → cuda-cccl
+          cuda-cudart-dev-12-9 → cuda-cudart-dev
+          libcublas-dev-12-9 → libcublas-dev
+          libc6 → libc6 (no change)
+        """
+        # Pattern: package-name-##-# → package-name
+        match = re.match(r"^(.+?)-(\d+)-(\d+)$", package)
+        if match:
+            base_name = match.group(1)
+            # Only normalize if it looks like a CUDA/NVIDIA package
+            if any(x in base_name for x in [
+                    "cuda", "cublas", "curand", "cusolver", "cusparse",
+                    "nvjitlink", "nvinfer"
+            ]):
+                return base_name
+
+        return package
+
+
+# ============================================================================
+# MODULE 3: ArtifactCollector
+# ============================================================================
+
+
+class ArtifactCollector:
+    """
+    Collects artifacts from D files (headers), link.txt files (libraries), and wheels (binaries).
+
+    Args:
+        build_dir: Path to build directory to scan
+    """
+
+    def __init__(self, build_dir: Path):
+        self.build_dir = build_dir
+
+    def collect_all(self) -> List[Artifact]:
+        """
+        Collect all artifacts from build directory.
+
+        Algorithm:
+          1. Find all *.d files → parse headers
+          2. Find all link.txt files → parse libraries
+          3. Find all *.whl files → extract and scan binaries
+          4. Return combined deduplicated list
+        """
+        artifacts = []
+
+        # Collect from D files
+        d_files = list(self.build_dir.rglob("*.d"))
+        for d_file in d_files:
+            artifacts.extend(self._parse_d_file(d_file))
+
+        # Collect from link files
+        link_files = list(self.build_dir.rglob("link.txt"))
+        for link_file in link_files:
+            artifacts.extend(self._parse_link_file(link_file))
+
+        # Collect from wheels
+        wheel_files = list(self.build_dir.rglob("*.whl"))
+        for wheel_file in wheel_files:
+            artifacts.extend(self._scan_wheel(wheel_file))
+
+        # Deduplicate by path
+        seen = set()
+        unique_artifacts = []
+        for artifact in artifacts:
+            if artifact.path not in seen:
+                seen.add(artifact.path)
+                unique_artifacts.append(artifact)
+
+        return unique_artifacts
+
+    def _parse_d_file(self, d_file: Path) -> List[Artifact]:
+        """
+        Parse CMake dependency file (.d) to extract header dependencies.
+
+        Algorithm:
+          1. Read file content
+          2. Handle line continuations (backslash at end)
+          3. Split by whitespace to get all paths
+          4. Skip first token (target: header1 header2 ...)
+          5. Strip trailing colons from paths (handles malformed .d files)
+          6. Resolve relative paths from depfile's parent directory
+          7. Filter out non-existent paths
+          8. Canonicalize with os.path.realpath()
+
+        Malformed .d File Handling:
+          Some CMake-generated .d files contain paths with trailing colons.
+          Example: '/usr/include/stdc-predef.h:' (should be '/usr/include/stdc-predef.h')
+
+          This is caused by incorrect formatting in CMake's dependency tracking.
+          The parser strips trailing colons using rstrip(':') to handle these cases,
+          preventing duplicate artifacts and improving accuracy.
+
+        Example D file:
+          ```
+          build/foo.o: /usr/include/stdio.h \
+            ../include/myheader.h \
+            /usr/local/cuda/include/cuda.h
+          ```
+        """
+
+        artifacts = []
+
+        try:
+            content = d_file.read_text(encoding='utf-8', errors='ignore')
+        except Exception:
+            return artifacts
+
+        # Handle line continuations
+        content = content.replace("\\\n", " ").replace("\\\r\n", " ")
+
+        # Split by whitespace
+        tokens = content.split()
+
+        # Skip first token (target:)
+        if not tokens or not tokens[0].endswith(":"):
+            return artifacts
+
+        header_paths = tokens[1:]
+        # CMake .d files use paths relative to the target's build directory (where Makefile is)
+        # This is the parent of the CMakeFiles directory
+        d_file_parts = d_file.parts
+        if 'CMakeFiles' in d_file_parts:
+            cmake_idx = d_file_parts.index('CMakeFiles')
+            context_dir = Path(*d_file_parts[:cmake_idx])
+        else:
+            # Fallback to build_dir if no CMakeFiles in path
+            context_dir = self.build_dir
+
+        for header_path in header_paths:
+            # Strip trailing colons from paths (malformed .d files)
+            # Some .d files have malformed entries like '/usr/include/stdc-predef.h:'
+            header_path = header_path.rstrip(':')
+            if not header_path:
+                continue
+
+            # Store original relative path before joining (for 3rdparty resolution)
+            original_header_path = header_path
+
+            # Resolve relative paths
+            if not os.path.isabs(header_path):
+                header_path = os.path.join(context_dir, header_path)
+
+            # Canonicalize path
+            try:
+                canonical_path = os.path.realpath(header_path)
+
+                # If path doesn't exist and contains submodules dir pattern, try resolving from submodules directory
+                submodules_pattern = f'{THIRDPARTY_ROOT.name}/'
+                if not os.path.exists(
+                        canonical_path
+                ) and submodules_pattern in original_header_path:
+                    # Extract the part starting from FIRST submodules dir pattern in ORIGINAL path (handles nested dirs)
+                    # e.g., ../../../../3rdparty/xgrammar/3rdparty/picojson/picojson.h
+                    # should extract: xgrammar/3rdparty/picojson/picojson.h
+                    idx = original_header_path.find(submodules_pattern)
+                    if idx != -1:
+                        relative_part = original_header_path[
+                            idx + len(submodules_pattern):]
+                        # Resolve from THIRDPARTY_ROOT constant
+                        alternative_path = THIRDPARTY_ROOT / relative_part
+                        alternative_canonical = os.path.realpath(
+                            str(alternative_path))
+
+                        if os.path.exists(alternative_canonical):
+                            canonical_path = alternative_canonical
+
+                # If path doesn't exist and contains '_deps/', try resolving from build root
+                if not os.path.exists(
+                        canonical_path) and '_deps/' in header_path:
+                    # Extract the part starting from '_deps/'
+                    parts = header_path.split('_deps/')
+                    if len(parts) >= 2:
+                        # Find build root (go up from cpp/dependency_scan to cpp/build)
+                        build_root = self.build_dir
+                        alternative_path = build_root / '_deps' / parts[-1]
+                        alternative_canonical = os.path.realpath(
+                            str(alternative_path))
+
+                        if os.path.exists(alternative_canonical):
+                            canonical_path = alternative_canonical
+
+                # If path doesn't exist, try searching for it within build directory
+                # This handles cases like nvshmem-build/ or other CMake ExternalProject paths
+                if not os.path.exists(canonical_path) and not os.path.isabs(
+                        header_path):
+                    # Extract base filename to search for
+                    basename = os.path.basename(header_path)
+                    # Try to find the file within the build directory
+                    import subprocess
+                    try:
+                        result = subprocess.run([
+                            'find',
+                            str(self.build_dir), '-name', basename, '-type', 'f'
+                        ],
+                                                capture_output=True,
+                                                text=True,
+                                                timeout=5)
+                        if result.returncode == 0 and result.stdout.strip():
+                            matches = result.stdout.strip().split('\n')
+                            # Try to find match with similar relative path structure
+                            for match in matches:
+                                if header_path in match or match.endswith(
+                                        header_path):
+                                    canonical_path = os.path.realpath(match)
+                                    break
+                            # If no exact match, use first match
+                            if not os.path.exists(canonical_path) and matches:
+                                canonical_path = os.path.realpath(matches[0])
+                    except Exception:
+                        pass
+
+                # Include all headers (even non-existent) for complete coverage
+                artifacts.append(
+                    Artifact(path=canonical_path,
+                             type='header',
+                             source=str(d_file),
+                             context_dir=str(context_dir),
+                             metadata={
+                                 'original_path': original_header_path,
+                                 'path_exists': os.path.exists(canonical_path)
+                             }))
+            except Exception:
+                continue
+
+        return artifacts
+
+    def _parse_link_file(self, link_file: Path) -> List[Artifact]:
+        """
+        Parse CMake link.txt file to extract library dependencies.
+
+        Algorithm:
+          1. Read file content (single line linker command)
+          2. Split by whitespace
+          3. Extract:
+             a) -l flags (e.g., -lpthread)
+             b) Absolute library paths (*.a, *.so)
+             c) @response.rsp files → recursively expand
+             d) CMakeFiles linker artifacts with embedded -Wl flags (special handling)
+          4. Deduplicate and return
+
+        CMakeFiles Linker Artifact Extraction:
+          CMake generates special linker artifacts in CMakeFiles directories that
+          encode library dependencies in the path itself.
+
+          Pattern: /path/CMakeFiles/foo.dir/-Wl,-soname,libtest.so.1
+
+          These paths contain embedded linker flags (-Wl,-soname) that specify
+          the library's soname. The parser extracts the library name (libtest.so.1),
+          strips the 'lib' prefix, and converts it to a linker flag (-ltest).
+
+          This enables proper dependency mapping for internal build artifacts that
+          would otherwise be unmapped.
+
+        Example link.txt:
+          ```
+          /usr/bin/c++ ... -lpthread -ldl /path/to/libfoo.a @response.rsp
+          ```
+        """
+
+        artifacts = []
+
+        try:
+            content = link_file.read_text(encoding='utf-8', errors='ignore')
+        except Exception:
+            return artifacts
+
+        tokens = content.split()
+        context_dir = link_file.parent
+
+        for token in tokens:
+            # Handle response files (@response.rsp)
+            if token.startswith("@"):
+                rsp_file = Path(context_dir) / token[1:]
+                if rsp_file.exists():
+                    artifacts.extend(self._parse_link_file(rsp_file))
+                continue
+
+            # Handle CMakeFiles linker artifacts with embedded -Wl flags
+            # Pattern: /path/CMakeFiles/foo.dir/-Wl,-soname,libbar.so
+            # These encode library dependencies in the path itself
+            if '/CMakeFiles/' in token and '/-Wl,' in token:
+                # Extract library name from -Wl,-soname,libfoo.so
+                match = re.search(r'-Wl,-soname,(.+)$', token)
+                if match:
+                    lib_name = match.group(1)
+                    # Add as linker flag artifact for pattern matching
+                    artifacts.append(
+                        Artifact(
+                            path=
+                            f"-l{lib_name.replace('lib', '').split('.')[0]}",
+                            type='library',
+                            source=str(link_file),
+                            context_dir=str(context_dir),
+                            metadata={
+                                'linker_flag': True,
+                                'cmake_linker_artifact': True,
+                                'original_token': token,
+                                'library_name': lib_name
+                            }))
+                continue
+
+            # Handle -l flags
+            if token.startswith("-l"):
+                artifacts.append(
+                    Artifact(path=token,
+                             type='library',
+                             source=str(link_file),
+                             context_dir=str(context_dir),
+                             metadata={'linker_flag': True}))
+                continue
+
+            # Handle absolute library paths
+            if token.endswith((".a", ".so")) or ".so." in token:
+                # Resolve relative paths
+                if not os.path.isabs(token):
+                    token = os.path.join(context_dir, token)
+
+                try:
+                    canonical_path = os.path.realpath(token)
+                    # Include all library paths (even non-existent) for complete coverage
+                    artifacts.append(
+                        Artifact(path=canonical_path,
+                                 type='library',
+                                 source=str(link_file),
+                                 context_dir=str(context_dir),
+                                 metadata={
+                                     'static': token.endswith('.a'),
+                                     'path_exists':
+                                     os.path.exists(canonical_path)
+                                 }))
+                except Exception:
+                    continue
+
+        return artifacts
+
+    def _scan_wheel(self, wheel_file: Path) -> List[Artifact]:
+        """
+        Extract wheel and scan for binary dependencies (.so files).
+
+        Algorithm:
+          1. Create temp directory
+          2. Extract wheel (ZIP format)
+          3. Find all *.so files
+          4. For each .so:
+             a) Run readelf -d to get NEEDED entries
+             b) Extract required library names
+          5. Cleanup temp directory
+          6. Return binary artifacts with NEEDED metadata
+
+        Example:
+          tensorrt_llm-0.1.0-py3-none-any.whl contains:
+            - tensorrt_llm/libs/libnvinfer_plugin_tensorrt_llm.so
+            - Uses: libcudart.so.12, libnvinfer.so.10, libstdc++.so.6
+        """
+        artifacts = []
+
+        # Create temp directory for extraction
+        with tempfile.TemporaryDirectory() as temp_dir:
+            try:
+                # Extract wheel
+                with zipfile.ZipFile(wheel_file, 'r') as zip_ref:
+                    zip_ref.extractall(temp_dir)
+
+                # Find all .so files
+                temp_path = Path(temp_dir)
+                so_files = list(temp_path.rglob("*.so")) + list(
+                    temp_path.rglob("*.so.*"))
+
+                for so_file in so_files:
+                    # Get NEEDED entries with readelf
+                    needed_libs = self._get_needed_libraries(so_file)
+
+                    # Create artifact for the .so file itself
+                    artifacts.append(
+                        Artifact(path=str(so_file.relative_to(temp_path)),
+                                 type='binary',
+                                 source=str(wheel_file),
+                                 metadata={
+                                     'wheel': wheel_file.name,
+                                     'needed': needed_libs
+                                 }))
+
+                    # Create artifacts for NEEDED libraries
+                    for needed_lib in needed_libs:
+                        artifacts.append(
+                            Artifact(path=needed_lib,
+                                     type='library',
+                                     source=str(wheel_file),
+                                     metadata={
+                                         'from_binary':
+                                         str(so_file.relative_to(temp_path)),
+                                         'dynamic_dependency':
+                                         True
+                                     }))
+
+            except Exception:
+                pass
+
+        return artifacts
+
+    @staticmethod
+    def _get_needed_libraries(binary_path: Path) -> List[str]:
+        """
+        Extract NEEDED entries from ELF binary using readelf.
+
+        Algorithm:
+          1. Execute: readelf -d <binary>
+          2. Parse output for lines containing "(NEEDED)"
+          3. Extract library names from "Shared library: [libfoo.so]"
+          4. Return list of library names
+
+        Example output:
+          ```
+           0x0000000000000001 (NEEDED)             Shared library: [libcudart.so.12]
+           0x0000000000000001 (NEEDED)             Shared library: [libstdc++.so.6]
+          ```
+        """
+        needed = []
+
+        try:
+            result = subprocess.run(
+                ["readelf", "-d", str(binary_path)],
+                capture_output=True,
+                text=True,
+                timeout=10)
+
+            if result.returncode == 0:
+                for line in result.stdout.split("\n"):
+                    if "(NEEDED)" in line and "Shared library:" in line:
+                        # Extract library name between brackets
+                        match = re.search(r'\[([^\]]+)\]', line)
+                        if match:
+                            needed.append(match.group(1))
+
+        except Exception:
+            pass
+
+        return needed
+
+
+# ============================================================================
+# MODULE 4: PatternMatcher (FALLBACK)
+# ============================================================================
+
+
+class PatternMatcher:
+    """
+    Resolves artifacts using YAML files from dependencies/ directory (FALLBACK strategy for non-dpkg packages).
+
+    Provides 3-tier resolution strategy:
+      1. Exact pattern matching (basename_matches and linker_flags_matches)
+      2. Path matching (directory_matches - rightmost match wins)
+      3. Generic library name inference (fallback)
+
+    YAML files are loaded from dependencies/ directory:
+      - Individual dependency files (e.g., tensorrt-llm.yml)
+      - Files with dependencies: list format (e.g., base.yml, cuda.yml)
+      - All *.yml files except those starting with '_'
+    """
+
+    def __init__(self, metadata_dir: Path):
+        """
+        Initialize PatternMatcher by loading YAML files from dependencies/ directory.
+
+        Args:
+            metadata_dir: Path to directory containing YAML dependency files
+        """
+        self.pattern_mappings: Dict[str, str] = {}
+        self.path_aliases: Dict[str, str] = {}
+        self.known_names: Set[str] = set()  # Track all known dependency names
+        self._schema = None
+        self._duplicate_warnings: Set[str] = set()
+
+        # Vendor directory magic strings (industry-standard patterns)
+        self.vendor_patterns = [
+            '3rdparty/', 'third-party/', 'thirdparty/', 'third_party/',
+            'external/', 'externals/', 'vendor/', 'vendored/', 'deps/'
+        ]
+
+        # Load schema if available
+        schema_file = metadata_dir / "_schema.yml"
+        if schema_file.exists() and JSONSCHEMA_AVAILABLE:
+            with open(schema_file, 'r') as f:
+                self._schema = yaml.safe_load(f)
+
+        # Load all YAML files
+        self._load_yaml_files(metadata_dir)
+
+    def _load_yaml_files(self, metadata_dir: Path):
+        """
+        Load all YAML files from dependencies/ directory.
+
+        Algorithm:
+          1. Find all *.yml files (except those starting with '_')
+          2. Load each file and validate against schema
+          3. Handle two formats:
+             - Individual dependency files (with name, basename_matches, etc.)
+             - Files with dependencies: list format (e.g., base.yml, cuda.yml)
+          4. Merge all basename_matches/linker_flags_matches into pattern_mappings
+          5. Merge all directory_matches into path_aliases
+          6. Warn about validation errors and duplicates
+        """
+        yaml_files = sorted([
+            f for f in metadata_dir.glob("*.yml") if not f.name.startswith("_")
+        ])
+
+        for yaml_file in yaml_files:
+            try:
+                with open(yaml_file, 'r') as f:
+                    data = yaml.safe_load(f)
+
+                # Handle files with dependencies list (e.g., dpkg.yml, cuda.yml)
+                if "dependencies" in data and isinstance(
+                        data["dependencies"], list):
+                    for dep_data in data["dependencies"]:
+                        self._process_dependency(dep_data, yaml_file)
+                # Handle individual dependency files
+                elif "name" in data:
+                    self._process_dependency(data, yaml_file)
+                else:
+                    print(
+                        f"Warning: Skipping {yaml_file.name} - unrecognized format",
+                        file=sys.stderr)
+
+            except yaml.YAMLError as e:
+                print(f"Warning: Failed to parse {yaml_file.name}: {e}",
+                      file=sys.stderr)
+            except Exception as e:
+                print(f"Warning: Error loading {yaml_file.name}: {e}",
+                      file=sys.stderr)
+
+    def _process_dependency(self, dep_data: Dict[str, Any], source_file: Path):
+        """
+        Process a single dependency definition and merge into internal structures.
+
+        Args:
+            dep_data: Dictionary containing dependency definition
+            source_file: Path to YAML file being processed (for error messages)
+        """
+        # Validate against schema if available
+        if self._schema and JSONSCHEMA_AVAILABLE:
+            try:
+                validate(instance=dep_data, schema=self._schema)
+            except ValidationError as e:
+                print(
+                    f"Warning: Validation error in {source_file.name}: {e.message}",
+                    file=sys.stderr)
+                # Continue processing despite validation errors
+
+        dependency_name = dep_data.get("name")
+        if not dependency_name:
+            print(f"Warning: Missing 'name' field in {source_file.name}",
+                  file=sys.stderr)
+            return
+
+        # Merge basename_matches into pattern_mappings
+        basename_matches = dep_data.get("basename_matches", [])
+        for pattern in basename_matches:
+            if pattern in self.pattern_mappings and pattern not in self._duplicate_warnings:
+                print(
+                    f"Warning: Duplicate basename match '{pattern}' found in {source_file.name} "
+                    f"(previously mapped to '{self.pattern_mappings[pattern]}', now '{dependency_name}')",
+                    file=sys.stderr)
+                self._duplicate_warnings.add(pattern)
+            self.pattern_mappings[pattern] = dependency_name
+
+        # Merge linker_flags_matches into pattern_mappings
+        linker_flags_matches = dep_data.get("linker_flags_matches", [])
+        for flag in linker_flags_matches:
+            if flag in self.pattern_mappings and flag not in self._duplicate_warnings:
+                print(
+                    f"Warning: Duplicate linker flag '{flag}' found in {source_file.name} "
+                    f"(previously mapped to '{self.pattern_mappings[flag]}', now '{dependency_name}')",
+                    file=sys.stderr)
+                self._duplicate_warnings.add(flag)
+            self.pattern_mappings[flag] = dependency_name
+
+        # Merge directory_matches into path_aliases
+        directory_matches = dep_data.get("directory_matches", [])
+        for component in directory_matches:
+            if component in self.path_aliases and component not in self._duplicate_warnings:
+                print(
+                    f"Warning: Duplicate path component '{component}' found in {source_file.name} "
+                    f"(previously mapped to '{self.path_aliases[component]}', now '{dependency_name}')",
+                    file=sys.stderr)
+                self._duplicate_warnings.add(component)
+            self.path_aliases[component] = dependency_name
+
+        # Track known dependency names (for nested vendor detection)
+        self.known_names.add(dependency_name.lower())
+        for component in directory_matches:
+            self.known_names.add(component.lower())
+
+    def match(self, artifact: Artifact) -> Optional[Mapping]:
+        """
+        Match artifact using 3-tier strategy.
+
+        Algorithm:
+          1. Try pattern matching (basename_matches and linker_flags_matches - exact match only - highest confidence)
+          2. Try path matching (directory_matches - rightmost directory wins)
+          3. Try generic library name inference (lowest confidence)
+          4. Return first match or None
+        """
+        # Strategy 1: Pattern matching (exact match only)
+        result = self._match_patterns(artifact)
+        if result:
+            return result
+
+        # Strategy 2: Path matching (directory_matches)
+        result = self._match_path_alias(artifact)
+        if result:
+            return result
+
+        # Strategy 3: Generic library name inference (fallback)
+        result = self._match_generic_library(artifact)
+        if result:
+            return result
+
+        return None
+
+    def _match_patterns(self, artifact: Artifact) -> Optional[Mapping]:
+        """
+        Match using pattern_mappings dictionary (exact match only).
+
+        Only performs exact matching against basename_matches from YAML files.
+        Substring matching has been removed to prevent false positives.
+
+        Algorithm:
+          1. Try exact match on basename (e.g., "libcudart.so.12")
+          2. Try exact match on full path (e.g., "-lpthread")
+          3. Return mapped dependency with HIGH confidence
+
+        Examples:
+          -lpthread → libc6 (exact basename match)
+          libcudart.so.12 → cuda-cudart-12 (exact basename match)
+
+        Note: For partial path matching, use directory_matches in YAML files.
+              Directory matches work on whole directory names (e.g., "fmt/" in path).
+        """
+        basename = os.path.basename(artifact.path)
+
+        # Try exact match on basename
+        if basename in self.pattern_mappings:
+            return Mapping(artifact=artifact,
+                           dependency=self.pattern_mappings[basename],
+                           confidence='high',
+                           strategy='exact_pattern_match',
+                           metadata={'matched_key': basename})
+
+        # Try exact match on full path (for -l flags)
+        if artifact.path in self.pattern_mappings:
+            return Mapping(artifact=artifact,
+                           dependency=self.pattern_mappings[artifact.path],
+                           confidence='high',
+                           strategy='exact_pattern_match',
+                           metadata={'matched_key': artifact.path})
+
+        # Substring matching removed - too high risk for false positives
+        # Use directory_matches instead for safe partial path matching
+
+        return None
+
+    def _match_path_alias(self, artifact: Artifact) -> Optional[Mapping]:
+        """
+        Match using path_aliases (supports single and multi-directory patterns).
+
+        Algorithm:
+          1. Split path by '/' to get directory components (filter empty strings)
+          2. For each pattern in path_aliases:
+             a. Normalize pattern (strip leading/trailing slashes)
+             b. Split pattern into components
+             c. Search for exact consecutive component sequence in path
+             d. Rightmost match wins
+          3. Return first match with MEDIUM confidence
+
+        Examples:
+          Single component:
+            Pattern: "pytorch"
+            /foo/bar/pytorch/include/torch/torch.h → pytorch (matches "pytorch")
+
+          Multi-directory:
+            Pattern: "foo/bar"
+            /home/foo/bar/file.h → foo/bar (matches consecutive "foo", "bar")
+            /home/foobar/file.h → NO MATCH ("foobar" != ["foo", "bar"])
+
+          Rightmost wins:
+            Pattern: "foo/bar"
+            /foo/bar/baz/foo/bar/qux.h → matches at rightmost position
+        """
+        # Split path into components (filter out empty strings from leading slash)
+        path_components = [c for c in artifact.path.split('/') if c]
+
+        best_match = None
+        best_position = -1  # Rightmost wins (highest position)
+
+        for pattern, dependency in self.path_aliases.items():
+            # Normalize pattern: strip slashes, split into components
+            pattern_normalized = pattern.strip('/')
+
+            # Handle empty pattern after stripping
+            if not pattern_normalized:
+                continue
+
+            pattern_components = pattern_normalized.split('/')
+            pattern_len = len(pattern_components)
+
+            # Search for exact consecutive sequence match using sliding window
+            # Iterate through all possible positions in the path
+            for i in range(len(path_components) - pattern_len + 1):
+                # Check if pattern_components match exactly at position i
+                if path_components[i:i + pattern_len] == pattern_components:
+                    # Found exact match at position i
+                    # Keep rightmost match (highest i value wins)
+                    if i > best_position:
+                        best_position = i
+                        best_match = Mapping(artifact=artifact,
+                                             dependency=dependency,
+                                             confidence='medium',
+                                             strategy='path_alias',
+                                             metadata={
+                                                 'matched_pattern':
+                                                 pattern,
+                                                 'matched_sequence':
+                                                 '/'.join(pattern_components),
+                                                 'position':
+                                                 i
+                                             })
+
+        return best_match
+
+    def _match_generic_library(self, artifact: Artifact) -> Optional[Mapping]:
+        """
+        Generic library name inference (FALLBACK with LOW confidence).
+
+        Algorithm:
+          1. Check if library type
+          2. Extract basename
+          3. Strip lib prefix and .so/.a suffix
+          4. Return as dependency with LOW confidence
+
+        Examples:
+          libfoobar.so → foobar (low confidence)
+          libtest.so.1 → test (low confidence)
+        """
+        if artifact.type != 'library':
+            return None
+
+        basename = os.path.basename(artifact.path)
+
+        # Try to extract library name
+        match = re.match(r'^lib([a-zA-Z0-9_-]+)\.(?:so|a)(?:\.\d+)*$', basename)
+        if match:
+            return Mapping(artifact=artifact,
+                           dependency=match.group(1),
+                           confidence='low',
+                           strategy='generic_library_inference',
+                           metadata={'inferred_from': basename})
+
+        return None
+
+    def extract_vendor_components(self, path: str) -> List[tuple]:
+        """
+        Extract all vendor components from a path using magic strings.
+
+        Args:
+            path: Artifact path to scan
+
+        Returns:
+            List of (pattern, component) tuples for each vendor boundary found
+
+        Example:
+            "/3rdparty/xgrammar/3rdparty/picojson/file.h" →
+            [("3rdparty/", "xgrammar"), ("3rdparty/", "picojson")]
+        """
+        components = []
+        path_lower = path.lower()
+
+        for pattern in self.vendor_patterns:
+            idx = 0
+            while True:
+                idx = path_lower.find(pattern, idx)
+                if idx == -1:
+                    break
+
+                # Extract component name after the pattern
+                start = idx + len(pattern)
+                end = path_lower.find('/', start)
+                if end == -1:
+                    end = len(path_lower)
+
+                component = path[start:end]  # Use original case
+                if component:  # Skip empty components
+                    components.append((pattern, component))
+
+                idx = end
+
+        return components
+
+    def find_unknown_vendor_boundaries(self,
+                                       artifact: Artifact) -> Optional[str]:
+        """
+        Check if artifact contains any unknown vendor boundaries.
+
+        Unified vendor boundary policy: ANY component following a vendor pattern
+        (3rdparty/, vendor/, external/, etc.) MUST be in the known allowlist.
+
+        Returns:
+            Name of unknown vendor boundary component, or None if all are known
+
+        Examples:
+            Path: "/3rdparty/xgrammar/src/file.h"
+            Components: ["xgrammar"]
+            If "xgrammar" is known → returns None (OK)
+
+            Path: "/3rdparty/unknown-lib/file.h"
+            Components: ["unknown-lib"]
+            If "unknown-lib" is NOT known → returns "unknown-lib" (REJECT)
+
+            Path: "/3rdparty/xgrammar/3rdparty/picojson/file.h"
+            Components: ["xgrammar", "picojson"]
+            If "picojson" is NOT known → returns "picojson" (REJECT)
+        """
+        components = self.extract_vendor_components(artifact.path)
+
+        # Check ALL vendor boundaries (rightmost has priority for detection)
+        for pattern, component in reversed(components):
+            component_lower = component.lower()
+            if component_lower not in self.known_names:
+                return component
+
+        return None
+
+
+# ============================================================================
+# MODULE 5: OutputGenerator
+# ============================================================================
+
+
+class OutputGenerator:
+    """
+    Generates YAML reports for known and unknown artifacts.
+
+    Output files:
+      - known.yml: Successfully mapped artifacts grouped by dependency (paths only)
+      - unknown.yml: Unmapped artifacts requiring pattern additions (paths only)
+    """
+
+    @staticmethod
+    def generate(mappings: List[Mapping], artifacts: List[Artifact],
+                 output_dir: Path):
+        """
+        Generate known.yml and unknown.yml with simplified structure (paths only).
+
+        Algorithm:
+          1. Create output directory if needed
+          2. Separate mapped vs unmapped artifacts
+          3. Group known artifacts by dependency (dict of lists)
+          4. Sort dependencies by count (most artifacts first)
+          5. Write YAML files with simplified structure
+        """
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        # Separate known vs unknown mappings
+        # Artifacts mapped to "unknown" should be treated as truly unknown
+        known_mappings = [m for m in mappings if m.dependency != 'unknown']
+        unknown_mappings = [m for m in mappings if m.dependency == 'unknown']
+
+        # Build mapping lookup (only for truly known)
+        mapped_paths = {m.artifact.path for m in known_mappings}
+
+        # Known artifacts - simplified structure (dependency -> list of paths)
+        known = {}
+        for mapping in known_mappings:
+            dep = mapping.dependency
+            if dep not in known:
+                known[dep] = []
+            known[dep].append(mapping.artifact.path)
+
+        # Sort dependencies by count (most artifacts first)
+        known_sorted = dict(
+            sorted(known.items(), key=lambda x: len(x[1]), reverse=True))
+
+        # Unknown artifacts - simplified structure (flat list of paths)
+        unknown_paths = []
+
+        # Add artifacts that weren't mapped at all
+        for artifact in artifacts:
+            if artifact.path not in mapped_paths and not any(
+                    m.artifact.path == artifact.path for m in unknown_mappings):
+                unknown_paths.append(artifact.path)
+
+        # Add artifacts mapped to "unknown"
+        for mapping in unknown_mappings:
+            unknown_paths.append(mapping.artifact.path)
+
+        # Write outputs
+        known_file = output_dir / 'known.yml'
+        unknown_file = output_dir / 'unknown.yml'
+
+        with open(known_file, 'w') as f:
+            yaml.dump(
+                {
+                    'summary': {
+                        'total_artifacts':
+                        len(artifacts),
+                        'mapped':
+                        len(known_mappings),
+                        'unmapped':
+                        len(unknown_paths),
+                        'coverage':
+                        f"{len(known_mappings) / len(artifacts) * 100:.1f}%"
+                        if artifacts else "0%",
+                        'unique_dependencies':
+                        len(known)
+                    },
+                    'dependencies': known_sorted
+                },
+                f,
+                default_flow_style=False,
+                sort_keys=False)
+
+        with open(unknown_file, 'w') as f:
+            yaml.dump(
+                {
+                    'summary': {
+                        'count':
+                        len(unknown_paths),
+                        'action_required':
+                        'Add patterns to YAML files in dependencies/ for these artifacts'
+                    },
+                    'artifacts': unknown_paths
+                },
+                f,
+                default_flow_style=False,
+                sort_keys=False)
+
+        # Generate path_issues.yml for non-existent paths
+        path_issues_file = output_dir / 'path_issues.yml'
+        non_existent_paths = []
+
+        for artifact in artifacts:
+            # Check if path_exists metadata is False
+            # Exclude libraries since they don't have meaningful original_path metadata
+            if (artifact.metadata
+                    and not artifact.metadata.get('path_exists', True)
+                    and artifact.type != 'library'):
+                non_existent_paths.append({
+                    'resolved_path':
+                    artifact.path,
+                    'type':
+                    artifact.type,
+                    'source':
+                    artifact.source,
+                    'd_file_path':
+                    artifact.metadata.get('original_path', 'N/A')
+                })
+
+        with open(path_issues_file, 'w') as f:
+            yaml.dump(
+                {
+                    'summary': {
+                        'count':
+                        len(non_existent_paths),
+                        'total_artifacts':
+                        len(artifacts),
+                        'percentage':
+                        f"{len(non_existent_paths) / len(artifacts) * 100:.1f}%"
+                        if artifacts else "0%",
+                        'note':
+                        'These header paths were resolved from .d files but do not exist in the filesystem (libraries excluded)'
+                    },
+                    'non_existent_paths': non_existent_paths
+                },
+                f,
+                default_flow_style=False,
+                sort_keys=False)
+
+        return known_file, unknown_file
+
+
+# ============================================================================
+# MODULE 6: Main Orchestration
+# ============================================================================
+
+
+def validate_yaml_files(metadata_dir: Path) -> bool:
+    """
+    Validate YAML files without running the scanner.
+
+    Args:
+        metadata_dir: Path to dependencies directory
+
+    Returns:
+        True if all files are valid, False otherwise
+    """
+    print("=" * 80)
+    print("YAML Validation")
+    print("=" * 80)
+    print(f"Metadata directory: {metadata_dir}")
+    print()
+
+    # Check if jsonschema is available
+    if not JSONSCHEMA_AVAILABLE:
+        print("Warning: jsonschema not installed, skipping validation",
+              file=sys.stderr)
+        print("Install with: pip install jsonschema")
+        return False
+
+    # Load schema
+    schema_file = metadata_dir / "_schema.yml"
+    if not schema_file.exists():
+        print(f"Error: Schema file not found: {schema_file}", file=sys.stderr)
+        return False
+
+    with open(schema_file, 'r') as f:
+        schema = yaml.safe_load(f)
+
+    # Validate all YAML files
+    yaml_files = sorted(
+        [f for f in metadata_dir.glob("*.yml") if not f.name.startswith("_")])
+    total = 0
+    valid = 0
+    invalid = 0
+
+    for yaml_file in yaml_files:
+        try:
+            with open(yaml_file, 'r') as f:
+                data = yaml.safe_load(f)
+
+            # Handle files with dependencies list format (e.g., base.yml, cuda.yml)
+            if "dependencies" in data and isinstance(data["dependencies"],
+                                                     list):
+                for dep_data in data["dependencies"]:
+                    total += 1
+                    try:
+                        validate(instance=dep_data, schema=schema)
+                        print(
+                            f"✓ {yaml_file.name}:{dep_data.get('name', 'unknown')}"
+                        )
+                        valid += 1
+                    except ValidationError as e:
+                        print(
+                            f"✗ {yaml_file.name}:{dep_data.get('name', 'unknown')}: {e.message}",
+                            file=sys.stderr)
+                        invalid += 1
+            # Handle individual dependency files
+            elif "name" in data:
+                total += 1
+                try:
+                    validate(instance=data, schema=schema)
+                    print(f"✓ {yaml_file.name}")
+                    valid += 1
+                except ValidationError as e:
+                    print(f"✗ {yaml_file.name}: {e.message}", file=sys.stderr)
+                    invalid += 1
+
+        except yaml.YAMLError as e:
+            print(f"✗ {yaml_file.name}: YAML parse error: {e}", file=sys.stderr)
+            invalid += 1
+        except Exception as e:
+            print(f"✗ {yaml_file.name}: {e}", file=sys.stderr)
+            invalid += 1
+
+    print()
+    print("=" * 80)
+    print(f"Results: {valid}/{total} valid, {invalid}/{total} invalid")
+    print("=" * 80)
+
+    return invalid == 0
+
+
+def main():
+    """
+    Main entry point for build artifact scanner.
+
+    Algorithm:
+      1. Parse command-line arguments
+      2. Validate inputs (build-dir exists, dependencies/ exists)
+      3. Collect artifacts using ArtifactCollector
+      4. Resolve using DpkgResolver (PRIMARY)
+      5. Resolve remaining using PatternMatcher (FALLBACK)
+      6. Generate reports using OutputGenerator
+      7. Print summary statistics
+    """
+    parser = argparse.ArgumentParser(
+        description='Minimal Build Artifact Scanner for TensorRT-LLM',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Scan default build directory
+  python scan_build_artifacts.py
+
+  # Scan custom build directory with custom output
+  python scan_build_artifacts.py --build-dir build/Release --output-dir scan_output/
+
+  # Validate YAML files without scanning
+  python scan_build_artifacts.py --validate
+
+  # Use custom dependencies directory
+  python scan_build_artifacts.py --dependencies-dir custom_dependencies/
+        """)
+
+    parser.add_argument(
+        '--build-dir',
+        type=Path,
+        default=Path(__file__).parent.parent / 'build',
+        help=
+        'Build directory to scan for C++ artifacts (default: ../build/). Note: wheels are in ../../build/'
+    )
+
+    parser.add_argument(
+        '--output-dir',
+        type=Path,
+        default=Path('scan_output'),
+        help='Output directory for reports (default: scan_output/)')
+
+    parser.add_argument(
+        '--metadata-dir',
+        type=Path,
+        default=Path(__file__).parent / 'metadata',
+        help=
+        'Path to metadata directory containing YAML files (default: ./metadata/)'
+    )
+
+    parser.add_argument('--validate',
+                        action='store_true',
+                        help='Validate YAML files without running scanner')
+
+    args = parser.parse_args()
+
+    # Handle --validate flag
+    if args.validate:
+        success = validate_yaml_files(args.metadata_dir)
+        sys.exit(0 if success else 1)
+
+    # Validate inputs
+    if not args.build_dir.exists():
+        print(f"Error: Build directory not found: {args.build_dir}",
+              file=sys.stderr)
+        sys.exit(1)
+
+    if not args.metadata_dir.exists():
+        print(f"Error: Metadata directory not found: {args.metadata_dir}",
+              file=sys.stderr)
+        sys.exit(1)
+
+    print("=" * 80)
+    print("TensorRT-LLM Build Artifact Scanner")
+    print("=" * 80)
+    print(f"Build directory: {args.build_dir}")
+    print(f"Metadata directory: {args.metadata_dir}")
+    print(f"Output directory: {args.output_dir}")
+    print()
+
+    # Step 1: Collect artifacts
+    print("[1/4] Collecting artifacts...")
+    collector = ArtifactCollector(args.build_dir)
+    artifacts = collector.collect_all()
+    print(f"  Found {len(artifacts)} unique artifacts")
+    print(f"    - Headers: {sum(1 for a in artifacts if a.type == 'header')}")
+    print(
+        f"    - Libraries: {sum(1 for a in artifacts if a.type == 'library')}")
+    print(f"    - Binaries: {sum(1 for a in artifacts if a.type == 'binary')}")
+    print()
+
+    # Step 2: Resolve with dpkg (PRIMARY)
+    print("[2/4] Resolving with dpkg-query (PRIMARY strategy)...")
+    dpkg_resolver = DpkgResolver()
+    dpkg_mappings = []
+
+    for artifact in artifacts:
+        package = dpkg_resolver.get_package(artifact.path)
+        if package:
+            dpkg_mappings.append(
+                Mapping(artifact=artifact,
+                        dependency=package,
+                        confidence='high',
+                        strategy='dpkg-query',
+                        metadata={'dpkg_package': package}))
+
+    print(
+        f"  Resolved {len(dpkg_mappings)} artifacts via dpkg ({len(dpkg_mappings) / len(artifacts) * 100:.1f}%)"
+    )
+    print()
+
+    # Step 3: Resolve remaining with YAML patterns (FALLBACK)
+    print("[3/4] Resolving remaining with YAML patterns (FALLBACK strategy)...")
+    pattern_matcher = PatternMatcher(args.metadata_dir)
+    pattern_mappings = []
+
+    dpkg_resolved_paths = {m.artifact.path for m in dpkg_mappings}
+    remaining_artifacts = [
+        a for a in artifacts if a.path not in dpkg_resolved_paths
+    ]
+
+    for artifact in remaining_artifacts:
+        mapping = pattern_matcher.match(artifact)
+        if mapping:
+            # Check for unknown vendor boundaries BEFORE accepting the mapping
+            unknown_vendor = pattern_matcher.find_unknown_vendor_boundaries(
+                artifact)
+            if unknown_vendor:
+                # Artifact has unknown vendor boundary - treat as unknown
+                # Don't add to pattern_mappings (will be in unknown.yml)
+                print(
+                    f"  WARNING: Unknown vendor boundary '{unknown_vendor}' found in: {artifact.path}",
+                    file=sys.stderr)
+            else:
+                pattern_mappings.append(mapping)
+
+    print(
+        f"  Resolved {len(pattern_mappings)} additional artifacts via patterns ({len(pattern_mappings) / len(artifacts) * 100:.1f}%)"
+    )
+    print()
+
+    # Step 4: Generate reports
+    print("[4/4] Generating reports...")
+    all_mappings = dpkg_mappings + pattern_mappings
+    known_file, unknown_file = OutputGenerator.generate(all_mappings, artifacts,
+                                                        args.output_dir)
+
+    print(f"  Reports written to:")
+    print(f"    - {known_file}")
+    print(f"    - {unknown_file}")
+    print()
+
+    # Summary
+    # Separate known vs unknown (artifacts mapped to "unknown" are treated as unknown)
+    known_mappings = [m for m in all_mappings if m.dependency != 'unknown']
+    unknown_mappings = [m for m in all_mappings if m.dependency == 'unknown']
+
+    total_known = len(known_mappings)
+    total_unknown = len(artifacts) - total_known
+    coverage = (total_known / len(artifacts) * 100) if artifacts else 0
+
+    # Count dpkg/pattern strategies among known mappings only
+    dpkg_known = sum(1 for m in dpkg_mappings if m.dependency != 'unknown')
+    pattern_known = sum(1 for m in pattern_mappings
+                        if m.dependency != 'unknown')
+
+    print("=" * 80)
+    print("SUMMARY")
+    print("=" * 80)
+    print(f"Total artifacts: {len(artifacts)}")
+    print(
+        f"  Mapped (dpkg): {dpkg_known} ({dpkg_known / len(artifacts) * 100:.1f}%)"
+    )
+    print(
+        f"  Mapped (patterns): {pattern_known} ({pattern_known / len(artifacts) * 100:.1f}%)"
+    )
+    print(
+        f"  Unknown: {total_unknown} ({total_unknown / len(artifacts) * 100:.1f}%)"
+    )
+    print(f"Coverage: {coverage:.1f}%")
+    print()
+
+    # Confidence breakdown (only for known mappings)
+    high_conf = sum(1 for m in known_mappings if m.confidence == 'high')
+    med_conf = sum(1 for m in known_mappings if m.confidence == 'medium')
+    low_conf = sum(1 for m in known_mappings if m.confidence == 'low')
+
+    if known_mappings:
+        print("Confidence Distribution:")
+        print(
+            f"  High: {high_conf} ({high_conf / len(known_mappings) * 100:.1f}%)"
+        )
+        print(
+            f"  Medium: {med_conf} ({med_conf / len(known_mappings) * 100:.1f}%)"
+        )
+        print(
+            f"  Low: {low_conf} ({low_conf / len(known_mappings) * 100:.1f}%)")
+    else:
+        print("Confidence Distribution:")
+        print("  High: 0")
+        print("  Medium: 0")
+        print("  Low: 0")
+    print()
+
+    if total_unknown > 0:
+        print(f"ACTION REQUIRED: {total_unknown} artifacts unknown")
+        if len(unknown_mappings) > 0:
+            print(
+                f"  {len(unknown_mappings)} artifacts matched generic fallback (need specific patterns)"
+            )
+        print(f"  Review {unknown_file}")
+        print(f"  Add missing patterns to YAML files in {args.metadata_dir}")
+        print(f"  Re-run scanner to improve coverage")
+    else:
+        print("SUCCESS: All artifacts mapped!")
+
+    print("=" * 80)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/cpp/dependency_scan/tests/test_scan_build_artifacts.py b/cpp/dependency_scan/tests/test_scan_build_artifacts.py
new file mode 100644
index 00000000000..f9d47fdf35b
--- /dev/null
+++ b/cpp/dependency_scan/tests/test_scan_build_artifacts.py
@@ -0,0 +1,1711 @@
+#!/usr/bin/env python3
+"""
+Unit tests for scan_build_artifacts.py
+
+Tests all 5 modules:
+  1. DpkgResolver
+  2. ArtifactCollector
+  3. PatternMatcher (YAML-based)
+  4. OutputGenerator
+  5. Main CLI
+
+Run with: python -m pytest test_scan_build_artifacts.py -v
+"""
+
+# Import modules under test
+import os
+import sys
+import zipfile
+from pathlib import Path
+from unittest.mock import Mock, patch
+
+import pytest
+import yaml
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from scan_build_artifacts import (Artifact, ArtifactCollector, DpkgResolver,
+                                  Mapping, OutputGenerator, PatternMatcher)
+
+# ============================================================================
+# Test Data Models
+# ============================================================================
+
+
+def test_artifact_creation():
+    """Test Artifact dataclass creation and serialization"""
+    artifact = Artifact(path="/usr/include/stdio.h",
+                        type="header",
+                        source="test.d",
+                        context_dir="/build",
+                        metadata={"test": "value"})
+
+    assert artifact.path == "/usr/include/stdio.h"
+    assert artifact.type == "header"
+    assert artifact.source == "test.d"
+
+    # Test serialization
+    data = artifact.to_dict()
+    assert data['path'] == "/usr/include/stdio.h"
+    assert data['metadata']['test'] == "value"
+
+
+def test_mapping_creation():
+    """Test Mapping dataclass creation and serialization"""
+    artifact = Artifact(path="/usr/lib/libfoo.so",
+                        type="library",
+                        source="link.txt")
+
+    mapping = Mapping(artifact=artifact,
+                      dependency="foo",
+                      confidence="high",
+                      strategy="dpkg-query",
+                      metadata={"test": "meta"})
+
+    assert mapping.dependency == "foo"
+    assert mapping.confidence == "high"
+
+    # Test serialization
+    data = mapping.to_dict()
+    assert data['dependency'] == "foo"
+    assert data['artifact']['path'] == "/usr/lib/libfoo.so"
+
+
+# ============================================================================
+# Test DpkgResolver
+# ============================================================================
+
+
+class TestDpkgResolver:
+    """Test cases for DpkgResolver class"""
+
+    def test_get_library_search_paths(self):
+        """Test _get_library_search_paths returns expected directories"""
+        resolver = DpkgResolver()
+        paths = resolver._lib_search_paths
+
+        # Should contain standard paths
+        assert any("/lib/x86_64-linux-gnu" in p for p in paths)
+        assert any("/usr/lib/x86_64-linux-gnu" in p for p in paths)
+
+    def test_find_library_path_pthread(self):
+        """Test find_library_path resolves -lpthread"""
+        resolver = DpkgResolver()
+        result = resolver.find_library_path("-lpthread")
+
+        # Should find libpthread.so* in system paths
+        if result:  # May not exist on all systems
+            assert "pthread" in result
+            assert result.endswith((".so", ".a")) or ".so." in result
+
+    @patch('subprocess.run')
+    def test_get_package_success(self, mock_run):
+        """Test get_package successfully parses dpkg-query output"""
+        # Mock dpkg-query output
+        mock_run.return_value = Mock(
+            returncode=0,
+            stdout="libc6:amd64: /lib/x86_64-linux-gnu/libc.so.6\n")
+
+        resolver = DpkgResolver()
+        package = resolver.get_package("/lib/x86_64-linux-gnu/libc.so.6")
+
+        assert package == "libc6"
+        mock_run.assert_called_once()
+
+    @patch('subprocess.run')
+    def test_get_package_not_found(self, mock_run):
+        """Test get_package returns None for non-existent file"""
+        # Mock dpkg-query failure
+        mock_run.return_value = Mock(returncode=1, stdout="")
+
+        resolver = DpkgResolver()
+        package = resolver.get_package("/nonexistent/file.so")
+
+        assert package is None
+
+    @patch('subprocess.run')
+    def test_get_package_caching(self, mock_run):
+        """Test get_package caches results"""
+        mock_run.return_value = Mock(
+            returncode=0,
+            stdout="libc6:amd64: /lib/x86_64-linux-gnu/libc.so.6\n")
+
+        resolver = DpkgResolver()
+
+        # First call
+        pkg1 = resolver.get_package("/lib/x86_64-linux-gnu/libc.so.6")
+        # Second call (should use cache)
+        pkg2 = resolver.get_package("/lib/x86_64-linux-gnu/libc.so.6")
+
+        assert pkg1 == pkg2
+        # Should only call dpkg-query once
+        assert mock_run.call_count == 1
+
+    def test_normalize_cuda_package(self):
+        """Test _normalize_cuda_package removes version suffixes"""
+        resolver = DpkgResolver()
+
+        # Should normalize CUDA packages
+        assert resolver._normalize_cuda_package("cuda-cccl-12-9") == "cuda-cccl"
+        assert resolver._normalize_cuda_package(
+            "cuda-cudart-dev-12-9") == "cuda-cudart-dev"
+        assert resolver._normalize_cuda_package(
+            "libcublas-dev-12-9") == "libcublas-dev"
+
+        # Should NOT normalize non-CUDA packages
+        assert resolver._normalize_cuda_package("libc6") == "libc6"
+        assert resolver._normalize_cuda_package(
+            "python3-12-1") == "python3-12-1"
+
+    @patch('subprocess.run')
+    def test_get_package_linker_flag(self, mock_run):
+        """Test get_package handles -l flags by resolving first"""
+        # First call: find_library_path (no mock needed, uses real filesystem)
+        # Second call: dpkg-query
+        mock_run.return_value = Mock(
+            returncode=0,
+            stdout="libc6:amd64: /lib/x86_64-linux-gnu/libpthread.so.0\n")
+
+        resolver = DpkgResolver()
+
+        # Mock find_library_path to return a known path
+        with patch.object(resolver,
+                          'find_library_path',
+                          return_value='/lib/x86_64-linux-gnu/libpthread.so.0'):
+            package = resolver.get_package("-lpthread")
+
+        # Should resolve to libc6
+        if package:  # May fail if system doesn't have dpkg
+            assert package == "libc6"
+
+
+# ============================================================================
+# Test ArtifactCollector
+# ============================================================================
+
+
+class TestArtifactCollector:
+    """Test cases for ArtifactCollector class"""
+
+    def test_parse_d_file_basic(self, tmp_path):
+        """Test _parse_d_file parses basic D file"""
+        # Create test D file
+        d_file = tmp_path / "test.d"
+        d_file.write_text(
+            "build/foo.o: /usr/include/stdio.h /usr/include/stdlib.h\n")
+
+        collector = ArtifactCollector(tmp_path)
+        artifacts = collector._parse_d_file(d_file)
+
+        # Should find 2 headers (if they exist on system)
+        assert len(artifacts) >= 0  # May be 0 if headers don't exist
+        for artifact in artifacts:
+            assert artifact.type == "header"
+            assert artifact.source == str(d_file)
+
+    def test_parse_d_file_line_continuations(self, tmp_path):
+        """Test _parse_d_file handles line continuations"""
+        # Create test D file with line continuations
+        d_file = tmp_path / "test.d"
+        d_file.write_text(
+            "build/foo.o: /usr/include/stdio.h \\\n  /usr/include/stdlib.h\n")
+
+        collector = ArtifactCollector(tmp_path)
+        artifacts = collector._parse_d_file(d_file)
+
+        # Should handle continuations correctly
+        assert isinstance(artifacts, list)
+
+    def test_parse_d_file_relative_paths(self, tmp_path):
+        """Test _parse_d_file resolves relative paths"""
+        # Create test header
+        include_dir = tmp_path / "include"
+        include_dir.mkdir()
+        test_header = include_dir / "test.h"
+        test_header.write_text("// test header\n")
+
+        # Create D file with relative path
+        d_file = tmp_path / "build" / "test.d"
+        d_file.parent.mkdir(parents=True, exist_ok=True)
+        d_file.write_text(f"build/foo.o: ../include/test.h\n")
+
+        collector = ArtifactCollector(tmp_path)
+        artifacts = collector._parse_d_file(d_file)
+
+        # Should resolve relative path
+        if artifacts:
+            assert any("test.h" in a.path for a in artifacts)
+
+    def test_parse_d_file_trailing_colons(self, tmp_path):
+        """Test _parse_d_file strips trailing colons from malformed paths"""
+        # Create test D file with trailing colons (malformed .d file)
+        d_file = tmp_path / "test.d"
+        d_file.write_text(
+            "build/foo.o: /usr/include/stdio.h: /usr/include/stdlib.h:\n")
+
+        collector = ArtifactCollector(tmp_path)
+        artifacts = collector._parse_d_file(d_file)
+
+        # Should strip trailing colons from paths
+        for artifact in artifacts:
+            assert not artifact.path.endswith(':')
+            assert artifact.path  # No empty strings
+
+    def test_parse_link_file_basic(self, tmp_path):
+        """Test _parse_link_file parses link.txt"""
+        # Create test link file
+        link_file = tmp_path / "link.txt"
+        link_file.write_text(
+            "/usr/bin/c++ -o foo -lpthread -ldl /path/to/libfoo.a\n")
+
+        collector = ArtifactCollector(tmp_path)
+        artifacts = collector._parse_link_file(link_file)
+
+        # Should find -l flags
+        assert any(a.path == "-lpthread" for a in artifacts)
+        assert any(a.path == "-ldl" for a in artifacts)
+
+    def test_parse_link_file_response_files(self, tmp_path):
+        """Test _parse_link_file handles @response.rsp recursively"""
+        # Create response file
+        rsp_file = tmp_path / "response.rsp"
+        rsp_file.write_text("-lpthread -ldl\n")
+
+        # Create link file referencing response file
+        link_file = tmp_path / "link.txt"
+        link_file.write_text(f"/usr/bin/c++ -o foo @response.rsp\n")
+
+        collector = ArtifactCollector(tmp_path)
+        artifacts = collector._parse_link_file(link_file)
+
+        # Should recursively expand response file
+        assert any(a.path == "-lpthread" for a in artifacts)
+
+    def test_parse_link_file_static_libraries(self, tmp_path):
+        """Test _parse_link_file handles .a files"""
+        # Create static library
+        static_lib = tmp_path / "libtest.a"
+        static_lib.write_text("fake static library\n")
+
+        # Create link file
+        link_file = tmp_path / "link.txt"
+        link_file.write_text(f"/usr/bin/c++ -o foo {static_lib}\n")
+
+        collector = ArtifactCollector(tmp_path)
+        artifacts = collector._parse_link_file(link_file)
+
+        # Should find static library
+        assert any("libtest.a" in a.path for a in artifacts)
+        if artifacts:
+            assert artifacts[0].metadata.get('static') == True
+
+    def test_parse_link_file_cmake_linker_artifacts(self, tmp_path):
+        """Test _parse_link_file handles CMakeFiles artifacts with -Wl,-soname"""
+        # Create link file with CMakeFiles linker artifact
+        link_file = tmp_path / "link.txt"
+        link_file.write_text(
+            "/usr/bin/c++ -o foo /build/CMakeFiles/foo.dir/-Wl,-soname,libtest.so.1\n"
+        )
+
+        collector = ArtifactCollector(tmp_path)
+        artifacts = collector._parse_link_file(link_file)
+
+        # Should extract library name from CMakeFiles artifact
+        linker_artifacts = [
+            a for a in artifacts if a.metadata.get('cmake_linker_artifact')
+        ]
+        assert len(linker_artifacts) == 1
+
+        artifact = linker_artifacts[0]
+        assert artifact.path == "-ltest"  # Extracted from libtest.so.1
+        assert artifact.type == "library"
+        assert artifact.metadata['linker_flag'] == True
+        assert artifact.metadata['cmake_linker_artifact'] == True
+        assert artifact.metadata['library_name'] == "libtest.so.1"
+
+    def test_get_needed_libraries(self, tmp_path):
+        """Test _get_needed_libraries extracts NEEDED entries"""
+        # Mock readelf output
+        mock_output = """
+Dynamic section at offset 0x1000 contains 20 entries:
+  Tag        Type                         Name/Value
+ 0x0000000000000001 (NEEDED)             Shared library: [libcudart.so.12]
+ 0x0000000000000001 (NEEDED)             Shared library: [libstdc++.so.6]
+ 0x000000000000000e (SONAME)             Library soname: [libtest.so]
+        """
+
+        with patch('subprocess.run') as mock_run:
+            mock_run.return_value = Mock(returncode=0, stdout=mock_output)
+
+            needed = ArtifactCollector._get_needed_libraries(tmp_path /
+                                                             "fake.so")
+
+        assert "libcudart.so.12" in needed
+        assert "libstdc++.so.6" in needed
+
+    def test_scan_wheel(self, tmp_path):
+        """Test _scan_wheel extracts and scans wheel"""
+        # Create fake wheel
+        wheel_file = tmp_path / "test-1.0-py3-none-any.whl"
+
+        with zipfile.ZipFile(wheel_file, 'w') as zf:
+            # Add fake .so file
+            zf.writestr("tensorrt_llm/libs/libtest.so", b"fake binary")
+
+        collector = ArtifactCollector(tmp_path)
+
+        with patch.object(ArtifactCollector,
+                          '_get_needed_libraries',
+                          return_value=['libfoo.so']):
+            artifacts = collector._scan_wheel(wheel_file)
+
+        # Should find binary artifact
+        assert any(a.type == "binary" for a in artifacts)
+        assert any(a.type == "library" and a.path == "libfoo.so"
+                   for a in artifacts)
+
+    def test_collect_all_deduplication(self, tmp_path):
+        """Test collect_all deduplicates artifacts"""
+        # Create two D files with overlapping headers
+        d1 = tmp_path / "test1.d"
+        d2 = tmp_path / "test2.d"
+
+        d1.write_text("build/foo.o: /usr/include/stdio.h\n")
+        d2.write_text("build/bar.o: /usr/include/stdio.h\n")
+
+        collector = ArtifactCollector(tmp_path)
+        artifacts = collector.collect_all()
+
+        # Should deduplicate by path
+        paths = [a.path for a in artifacts]
+        assert len(paths) == len(set(paths))  # No duplicates
+
+    def test_parse_d_file_relative_path_resolution(self, tmp_path):
+        """Test _parse_d_file resolves paths relative to build_dir, not .d file's parent
+
+        This test verifies the fix for the critical bug where relative paths in CMake .d files
+        were being resolved from the wrong directory. CMake .d files use paths relative to the
+        build root, not the .d file's directory.
+        """
+        # Create a realistic directory structure:
+        # tmp_path (build_dir)
+        # ├── CMakeFiles/
+        # │   └── deeply/
+        # │       └── nested/
+        # │           └── target.dir/
+        # │               └── test.d  (contains relative paths)
+        # └── tensorrt_llm/
+        #     └── runtime/
+        #         └── layerProfiler.h  (target file)
+
+        # Create the target header file
+        runtime_dir = tmp_path / "tensorrt_llm" / "runtime"
+        runtime_dir.mkdir(parents=True)
+        target_header = runtime_dir / "layerProfiler.h"
+        target_header.write_text("// TensorRT-LLM runtime header\n")
+
+        # Create deeply nested .d file directory
+        d_file_dir = tmp_path / "CMakeFiles" / "deeply" / "nested" / "target.dir"
+        d_file_dir.mkdir(parents=True, exist_ok=True)
+        d_file = d_file_dir / "test.d"
+
+        # Write .d file with relative path from BUILD ROOT, not from .d file location
+        # From build root: tensorrt_llm/runtime/layerProfiler.h
+        # From .d file location: ../../../../../tensorrt_llm/runtime/layerProfiler.h would be wrong
+        d_file.write_text("build/foo.o: tensorrt_llm/runtime/layerProfiler.h\n")
+
+        collector = ArtifactCollector(tmp_path)
+        artifacts = collector._parse_d_file(d_file)
+
+        # Should resolve relative path using build_dir as context, not d_file.parent
+        assert len(artifacts) == 1, f"Expected 1 artifact, got {len(artifacts)}"
+        artifact = artifacts[0]
+
+        # Verify the path was resolved correctly
+        assert "layerProfiler.h" in artifact.path
+        assert artifact.type == "header"
+        assert artifact.source == str(d_file)
+
+        # Verify context_dir is build_dir, not d_file.parent
+        assert artifact.context_dir == str(tmp_path)
+
+        # Verify path exists (resolved correctly)
+        assert artifact.metadata.get('path_exists') is True
+
+        # Verify the resolved path points to the actual file
+        canonical_target = os.path.realpath(str(target_header))
+        assert artifact.path == canonical_target
+
+    def test_parse_d_file_build_root_context(self, tmp_path):
+        """Test that context_dir used for path resolution is self.build_dir
+
+        This test verifies that regardless of where the .d file is located in the build tree,
+        all relative paths are resolved from the same build root directory.
+        """
+        # Create headers at different locations
+        header1_dir = tmp_path / "include"
+        header1_dir.mkdir()
+        header1 = header1_dir / "test1.h"
+        header1.write_text("// test1 header\n")
+
+        header2_dir = tmp_path / "src" / "common"
+        header2_dir.mkdir(parents=True)
+        header2 = header2_dir / "test2.h"
+        header2.write_text("// test2 header\n")
+
+        # Create .d files at different depths
+        d_file1 = tmp_path / "shallow.d"
+        d_file1.write_text("build/obj1.o: include/test1.h\n")
+
+        d_file2_dir = tmp_path / "CMakeFiles" / "deep" / "nested"
+        d_file2_dir.mkdir(parents=True)
+        d_file2 = d_file2_dir / "deep.d"
+        d_file2.write_text("build/obj2.o: src/common/test2.h\n")
+
+        collector = ArtifactCollector(tmp_path)
+
+        # Parse both .d files
+        artifacts1 = collector._parse_d_file(d_file1)
+        artifacts2 = collector._parse_d_file(d_file2)
+
+        # Both should resolve successfully
+        assert len(artifacts1) == 1
+        assert len(artifacts2) == 1
+
+        # Both should use build_dir as context_dir
+        assert artifacts1[0].context_dir == str(tmp_path)
+        assert artifacts2[0].context_dir == str(tmp_path)
+
+        # Both should resolve to correct absolute paths
+        assert artifacts1[0].metadata.get('path_exists') is True
+        assert artifacts2[0].metadata.get('path_exists') is True
+
+        # Verify correct files were found
+        assert "test1.h" in artifacts1[0].path
+        assert "test2.h" in artifacts2[0].path
+
+    def test_parse_d_file_cross_project_paths(self, tmp_path):
+        """Test _parse_d_file handles paths that reference directories outside the build
+
+        This test verifies that paths referencing parent directories are resolved correctly
+        relative to build root, and that non-existent paths are properly marked.
+        """
+        # Create a project structure with 3rdparty dependencies:
+        # tmp_path (build_dir)
+        # ├── CMakeFiles/
+        # │   └── target.dir/
+        # │       └── test.d
+        # And simulate references to:
+        # ../../../../triton_backend/src/model.h (doesn't exist)
+        # ../../../tensorrt_llm/common/logger.h (exists)
+
+        # Create an existing header in a sibling directory structure
+        parent_dir = tmp_path.parent
+        trtllm_dir = parent_dir / "tensorrt_llm" / "common"
+        trtllm_dir.mkdir(parents=True, exist_ok=True)
+        existing_header = trtllm_dir / "logger.h"
+        existing_header.write_text("// Logger header\n")
+
+        # Create .d file with both existing and non-existing cross-project paths
+        d_file_dir = tmp_path / "CMakeFiles" / "target.dir"
+        d_file_dir.mkdir(parents=True)
+        d_file = d_file_dir / "test.d"
+
+        # These paths are relative to BUILD ROOT
+        d_file.write_text(
+            "build/foo.o: ../../../../triton_backend/src/model.h ../tensorrt_llm/common/logger.h\n"
+        )
+
+        collector = ArtifactCollector(tmp_path)
+        artifacts = collector._parse_d_file(d_file)
+
+        # Should process both paths
+        assert len(artifacts) == 2
+
+        # Find the artifacts
+        triton_artifact = None
+        logger_artifact = None
+
+        for artifact in artifacts:
+            if "triton_backend" in artifact.path:
+                triton_artifact = artifact
+            elif "logger.h" in artifact.path:
+                logger_artifact = artifact
+
+        # Verify triton_backend artifact (non-existent)
+        assert triton_artifact is not None
+        assert triton_artifact.type == "header"
+        assert triton_artifact.context_dir == str(tmp_path)
+        assert triton_artifact.metadata.get('path_exists') is False
+
+        # Verify logger artifact (exists)
+        assert logger_artifact is not None
+        assert logger_artifact.type == "header"
+        assert logger_artifact.context_dir == str(tmp_path)
+        assert logger_artifact.metadata.get('path_exists') is True
+
+        # Verify logger resolved to correct absolute path
+        canonical_existing = os.path.realpath(str(existing_header))
+        assert logger_artifact.path == canonical_existing
+
+    def test_parse_d_file_prevents_false_positives(self, tmp_path):
+        """Test that correct path resolution prevents false positives in dependency classification
+
+        This test demonstrates the practical impact of the bug fix: when paths are resolved
+        correctly from build_dir, dependencies are classified accurately.
+        """
+        # Scenario: A .d file deep in the build tree references a 3rdparty dependency
+        # OLD BUG: Would resolve from .d file's parent, potentially missing the file
+        # NEW FIX: Resolves from build root, finds the file correctly
+
+        # Create a 3rdparty dependency structure
+        third_party_dir = tmp_path / "3rdparty" / "cutlass" / "include"
+        third_party_dir.mkdir(parents=True)
+        cutlass_header = third_party_dir / "cutlass.h"
+        cutlass_header.write_text("// CUTLASS header\n")
+
+        # Create deeply nested .d file (simulating CMake's structure)
+        d_file_dir = tmp_path / "CMakeFiles" / "tensorrt_llm.dir" / "batch_manager" / "llm_request.cpp.o.d"
+        d_file_dir.parent.mkdir(parents=True, exist_ok=True)
+        d_file = d_file_dir
+
+        # .d file contains relative path from BUILD ROOT to cutlass
+        # OLD BUG: Would try to resolve from .d file's parent → incorrect path
+        # NEW FIX: Resolves from build root → correct path
+        d_file.write_text("build/obj.o: 3rdparty/cutlass/include/cutlass.h\n")
+
+        collector = ArtifactCollector(tmp_path)
+        artifacts = collector._parse_d_file(d_file)
+
+        # Should successfully resolve the path
+        assert len(artifacts) == 1
+        artifact = artifacts[0]
+
+        # Verify correct resolution
+        assert artifact.metadata.get('path_exists') is True
+        assert "cutlass.h" in artifact.path
+
+        # Verify it resolved to the actual file
+        canonical_cutlass = os.path.realpath(str(cutlass_header))
+        assert artifact.path == canonical_cutlass
+
+        # This artifact can now be correctly matched to cutlass dependency
+        # (with correct path resolution, pattern matching will work)
+
+
+# ============================================================================
+# Test PatternMatcher
+# ============================================================================
+
+
+class TestPatternMatcher:
+    """Test cases for PatternMatcher class"""
+
+    @pytest.fixture
+    def dependencies_dir(self, tmp_path):
+        """Create test dependencies directory with YAML files"""
+        deps_dir = tmp_path / "dependencies"
+        deps_dir.mkdir()
+
+        # Create dpkg.yml with system packages
+        dpkg_data = {
+            "dependencies": [{
+                "name": "libc6",
+                "version": "2.35",
+                "description": "GNU C Library: Shared libraries",
+                "basename_matches": [],
+                "linker_flags_matches": ["-lpthread"],
+                "directory_matches": []
+            }]
+        }
+        with open(deps_dir / "dpkg.yml", 'w') as f:
+            yaml.dump(dpkg_data, f)
+
+        # Create cuda-cudart-12.yml
+        cuda_data = {
+            "name": "cuda-cudart-12",
+            "version": "12.0",
+            "description": "NVIDIA CUDA Runtime library version 12",
+            "basename_matches": ["libcudart.so.12"],
+            "linker_flags_matches": [],
+            "directory_matches": ["cuda-12"]
+        }
+        with open(deps_dir / "cuda-cudart-12.yml", 'w') as f:
+            yaml.dump(cuda_data, f)
+
+        # Create pytorch.yml
+        pytorch_data = {
+            "name": "pytorch",
+            "version": "2.0",
+            "description": "PyTorch machine learning framework",
+            "basename_matches": [],
+            "linker_flags_matches": [],
+            "directory_matches": ["pytorch"]
+        }
+        with open(deps_dir / "pytorch.yml", 'w') as f:
+            yaml.dump(pytorch_data, f)
+
+        # Create deepep.yml with bundled binary pattern
+        deepep_data = {
+            "name": "deepep",
+            "version": "1.0",
+            "description": "DeepEP library",
+            "basename_matches": ["deep_ep_cpp"],
+            "linker_flags_matches": [],
+            "directory_matches": []
+        }
+        with open(deps_dir / "deepep.yml", 'w') as f:
+            yaml.dump(deepep_data, f)
+
+        # Create nlohmann-json.yml
+        json_data = {
+            "name": "nlohmann-json",
+            "version": "3.11",
+            "description": "JSON for Modern C++",
+            "basename_matches": [],
+            "linker_flags_matches": [],
+            "directory_matches": ["json"]
+        }
+        with open(deps_dir / "nlohmann-json.yml", 'w') as f:
+            yaml.dump(json_data, f)
+
+        return deps_dir
+
+    def test_match_exact_library(self, dependencies_dir):
+        """Test _match_patterns finds exact matches"""
+        matcher = PatternMatcher(dependencies_dir)
+
+        artifact = Artifact(path="-lpthread", type="library", source="link.txt")
+
+        mapping = matcher._match_patterns(artifact)
+
+        assert mapping is not None
+        assert mapping.dependency == "libc6"
+        assert mapping.confidence == "high"
+        assert mapping.strategy == "exact_pattern_match"
+
+    def test_match_substring_pattern(self, dependencies_dir):
+        """Test that substring matching is no longer supported (removed for safety)"""
+        matcher = PatternMatcher(dependencies_dir)
+
+        artifact = Artifact(path="tensorrt_llm/libs/deep_ep_cpp_tllm.so",
+                            type="binary",
+                            source="wheel")
+
+        # Substring matching was removed from _match_patterns to prevent false positives
+        # This test verifies it returns None for non-exact matches
+        mapping = matcher._match_patterns(artifact)
+
+        # Should return None since "deep_ep_cpp_tllm.so" doesn't exactly match "deep_ep_cpp"
+        assert mapping is None
+
+    def test_match_path_alias_rightmost(self, dependencies_dir):
+        """Test _match_path_alias uses rightmost directory match"""
+        matcher = PatternMatcher(dependencies_dir)
+
+        artifact = Artifact(path="/build/pytorch/include/torch/torch.h",
+                            type="header",
+                            source="test.d")
+
+        mapping = matcher._match_path_alias(artifact)
+
+        assert mapping is not None
+        assert mapping.dependency == "pytorch"
+        assert mapping.metadata['matched_pattern'] == "pytorch"
+        assert mapping.metadata['matched_sequence'] == "pytorch"
+
+    def test_match_path_multi_directory(self, tmp_path):
+        """Test _match_path_alias matches multi-directory patterns"""
+        deps_dir = tmp_path / "dependencies"
+        deps_dir.mkdir()
+
+        # Create YAML with multi-directory pattern
+        dep_data = {
+            "name": "test-lib",
+            "description": "Test library with multi-directory pattern",
+            "basename_matches": [],
+            "linker_flags_matches": [],
+            "directory_matches": ["foo/bar", "3rdparty/test"]
+        }
+        with open(deps_dir / "test-lib.yml", 'w') as f:
+            yaml.dump(dep_data, f)
+
+        matcher = PatternMatcher(deps_dir)
+
+        # Test: /home/foo/bar/file.h matches "foo/bar"
+        artifact1 = Artifact(path="/home/foo/bar/file.h",
+                             type="header",
+                             source="test.d")
+        mapping1 = matcher._match_path_alias(artifact1)
+        assert mapping1 is not None
+        assert mapping1.dependency == "test-lib"
+        assert mapping1.metadata['matched_pattern'] == "foo/bar"
+        assert mapping1.metadata['matched_sequence'] == "foo/bar"
+
+        # Test: /home/foobar/file.h does NOT match "foo/bar" (no substring matching)
+        artifact2 = Artifact(path="/home/foobar/file.h",
+                             type="header",
+                             source="test.d")
+        mapping2 = matcher._match_path_alias(artifact2)
+        assert mapping2 is None
+
+        # Test: /build/3rdparty/test/include/test.h matches "3rdparty/test"
+        artifact3 = Artifact(path="/build/3rdparty/test/include/test.h",
+                             type="header",
+                             source="test.d")
+        mapping3 = matcher._match_path_alias(artifact3)
+        assert mapping3 is not None
+        assert mapping3.dependency == "test-lib"
+        assert mapping3.metadata['matched_pattern'] == "3rdparty/test"
+
+    def test_match_path_multi_directory_rightmost(self, tmp_path):
+        """Test rightmost wins for multi-directory patterns"""
+        deps_dir = tmp_path / "dependencies"
+        deps_dir.mkdir()
+
+        dep_data = {
+            "name": "test-lib",
+            "description": "Test library for rightmost matching",
+            "basename_matches": [],
+            "linker_flags_matches": [],
+            "directory_matches": ["foo/bar"]
+        }
+        with open(deps_dir / "test-lib.yml", 'w') as f:
+            yaml.dump(dep_data, f)
+
+        matcher = PatternMatcher(deps_dir)
+
+        # Pattern: "foo/bar" appears twice in path
+        # Path: /foo/bar/baz/foo/bar/qux.h
+        # Should match at rightmost position (position 3)
+        artifact = Artifact(path="/foo/bar/baz/foo/bar/qux.h",
+                            type="header",
+                            source="test.d")
+        mapping = matcher._match_path_alias(artifact)
+
+        assert mapping is not None
+        assert mapping.dependency == "test-lib"
+        assert mapping.metadata['matched_pattern'] == "foo/bar"
+        # Position should be 3 (rightmost occurrence)
+        assert mapping.metadata['position'] == 3
+
+    def test_match_path_no_substring_matching(self, tmp_path):
+        """Test that substring matching is NOT supported"""
+        deps_dir = tmp_path / "dependencies"
+        deps_dir.mkdir()
+
+        dep_data = {
+            "name": "test-lib",
+            "description": "Test library for substring verification",
+            "basename_matches": [],
+            "linker_flags_matches": [],
+            "directory_matches": ["oo/ba", "o/b"]
+        }
+        with open(deps_dir / "test-lib.yml", 'w') as f:
+            yaml.dump(dep_data, f)
+
+        matcher = PatternMatcher(deps_dir)
+
+        # Pattern: "oo/ba"
+        # Path: /foo/bar/file.h
+        # Should NOT match ("oo" != "foo", "ba" != "bar")
+        artifact1 = Artifact(path="/foo/bar/file.h",
+                             type="header",
+                             source="test.d")
+        mapping1 = matcher._match_path_alias(artifact1)
+        assert mapping1 is None
+
+        # Pattern: "o/b"
+        # Path: /foo/bar/file.h
+        # Should NOT match
+        artifact2 = Artifact(path="/foo/bar/file.h",
+                             type="header",
+                             source="test.d")
+        mapping2 = matcher._match_path_alias(artifact2)
+        assert mapping2 is None
+
+    def test_match_path_mixed_single_and_multi(self, tmp_path):
+        """Test single and multi-directory patterns coexist"""
+        deps_dir = tmp_path / "dependencies"
+        deps_dir.mkdir()
+
+        # Create two dependencies: one with single, one with multi-dir patterns
+        dep1_data = {
+            "name": "pytorch",
+            "description": "PyTorch with single component",
+            "basename_matches": [],
+            "linker_flags_matches": [],
+            "directory_matches": ["pytorch", "torch"]
+        }
+        with open(deps_dir / "pytorch.yml", 'w') as f:
+            yaml.dump(dep1_data, f)
+
+        dep2_data = {
+            "name": "cutlass",
+            "description": "Cutlass with multi-directory",
+            "basename_matches": [],
+            "linker_flags_matches": [],
+            "directory_matches": ["3rdparty/cutlass"]
+        }
+        with open(deps_dir / "cutlass.yml", 'w') as f:
+            yaml.dump(dep2_data, f)
+
+        matcher = PatternMatcher(deps_dir)
+
+        # Test single component pattern still works
+        artifact1 = Artifact(path="/home/pytorch/lib/test.so",
+                             type="library",
+                             source="test")
+        mapping1 = matcher._match_path_alias(artifact1)
+        assert mapping1 is not None
+        assert mapping1.dependency == "pytorch"
+
+        # Test multi-directory pattern works
+        artifact2 = Artifact(path="/build/3rdparty/cutlass/include/cutlass.h",
+                             type="header",
+                             source="test.d")
+        mapping2 = matcher._match_path_alias(artifact2)
+        assert mapping2 is not None
+        assert mapping2.dependency == "cutlass"
+        assert mapping2.metadata['matched_sequence'] == "3rdparty/cutlass"
+
+    def test_match_generic_library_fallback(self, dependencies_dir):
+        """Test _match_generic_library as fallback"""
+        matcher = PatternMatcher(dependencies_dir)
+
+        artifact = Artifact(path="/usr/lib/libunknown.so.1",
+                            type="library",
+                            source="link.txt")
+
+        mapping = matcher._match_generic_library(artifact)
+
+        assert mapping is not None
+        assert mapping.dependency == "unknown"
+        assert mapping.confidence == "low"
+
+    def test_match_full_cascade(self, dependencies_dir):
+        """Test match() tries all strategies in order"""
+        matcher = PatternMatcher(dependencies_dir)
+
+        # Should match exact library (highest priority)
+        artifact1 = Artifact(path="-lpthread", type="library", source="test")
+        mapping1 = matcher.match(artifact1)
+        assert mapping1 is not None
+        assert mapping1.strategy == "exact_pattern_match"
+
+        # Should match exact pattern
+        artifact2 = Artifact(path="/usr/lib/libcudart.so.12",
+                             type="library",
+                             source="test")
+        mapping2 = matcher.match(artifact2)
+        assert mapping2 is not None
+        assert mapping2.strategy == "exact_pattern_match"
+
+        # Should fall back to generic
+        artifact3 = Artifact(path="/usr/lib/libfallback.so",
+                             type="library",
+                             source="test")
+        mapping3 = matcher.match(artifact3)
+        assert mapping3 is not None
+        assert mapping3.strategy == "generic_library_inference"
+
+    def test_yaml_loading_individual_files(self, tmp_path):
+        """Test loading individual YAML dependency files"""
+        deps_dir = tmp_path / "dependencies"
+        deps_dir.mkdir()
+
+        # Create individual dependency file
+        dep_data = {
+            "name": "test-lib",
+            "version": "1.0",
+            "description": "Test library for unit tests",
+            "basename_matches": ["libtest.so"],
+            "linker_flags_matches": ["-ltest"],
+            "directory_matches": ["test-lib"]
+        }
+        with open(deps_dir / "test-lib.yml", 'w') as f:
+            yaml.dump(dep_data, f)
+
+        matcher = PatternMatcher(deps_dir)
+
+        # Check pattern_mappings
+        assert "libtest.so" in matcher.pattern_mappings
+        assert matcher.pattern_mappings["libtest.so"] == "test-lib"
+        assert "-ltest" in matcher.pattern_mappings
+        assert matcher.pattern_mappings["-ltest"] == "test-lib"
+
+        # Check path_aliases
+        assert "test-lib" in matcher.path_aliases
+        assert matcher.path_aliases["test-lib"] == "test-lib"
+
+    def test_yaml_loading_dpkg_format(self, tmp_path):
+        """Test loading dpkg.yml with dependencies list"""
+        deps_dir = tmp_path / "dependencies"
+        deps_dir.mkdir()
+
+        # Create dpkg.yml with multiple dependencies
+        dpkg_data = {
+            "dependencies": [{
+                "name": "dep1",
+                "version": "1.0",
+                "description": "First dependency",
+                "basename_matches": ["libdep1.so"],
+                "linker_flags_matches": [],
+                "directory_matches": []
+            }, {
+                "name": "dep2",
+                "version": "2.0",
+                "description": "Second dependency",
+                "basename_matches": ["libdep2.so"],
+                "linker_flags_matches": [],
+                "directory_matches": []
+            }]
+        }
+        with open(deps_dir / "dpkg.yml", 'w') as f:
+            yaml.dump(dpkg_data, f)
+
+        matcher = PatternMatcher(deps_dir)
+
+        # Both dependencies should be loaded
+        assert "libdep1.so" in matcher.pattern_mappings
+        assert matcher.pattern_mappings["libdep1.so"] == "dep1"
+        assert "libdep2.so" in matcher.pattern_mappings
+        assert matcher.pattern_mappings["libdep2.so"] == "dep2"
+
+    def test_yaml_duplicate_pattern_warning(self, tmp_path, capsys):
+        """Test that duplicate patterns generate warnings"""
+        deps_dir = tmp_path / "dependencies"
+        deps_dir.mkdir()
+
+        # Create first file with pattern
+        dep1_data = {
+            "name": "dep1",
+            "version": "1.0",
+            "description": "First dependency with duplicate pattern",
+            "basename_matches": ["duplicate.so"],
+            "linker_flags_matches": [],
+            "directory_matches": []
+        }
+        with open(deps_dir / "dep1.yml", 'w') as f:
+            yaml.dump(dep1_data, f)
+
+        # Create second file with same pattern
+        dep2_data = {
+            "name": "dep2",
+            "version": "2.0",
+            "description": "Second dependency with duplicate pattern",
+            "basename_matches": ["duplicate.so"],
+            "linker_flags_matches": [],
+            "directory_matches": []
+        }
+        with open(deps_dir / "dep2.yml", 'w') as f:
+            yaml.dump(dep2_data, f)
+
+        # Initialize matcher (should emit warning)
+        matcher = PatternMatcher(deps_dir)
+
+        # Check warning was emitted
+        captured = capsys.readouterr()
+        assert "Warning: Duplicate basename match 'duplicate.so'" in captured.err
+
+        # Last one wins
+        assert matcher.pattern_mappings["duplicate.so"] == "dep2"
+
+    def test_yaml_invalid_format_warning(self, tmp_path, capsys):
+        """Test that invalid YAML format generates warnings"""
+        deps_dir = tmp_path / "dependencies"
+        deps_dir.mkdir()
+
+        # Create file with invalid format (missing name)
+        with open(deps_dir / "invalid.yml", 'w') as f:
+            yaml.dump({
+                "version": "1.0",
+                "description": "Missing name field"
+            }, f)
+
+        # Initialize matcher (should emit warning)
+        PatternMatcher(deps_dir)
+
+        # Check warning was emitted (either "Missing 'name' field" or "unrecognized format")
+        captured = capsys.readouterr()
+        assert ("Warning: Missing 'name' field" in captured.err
+                or "Warning: Skipping invalid.yml - unrecognized format"
+                in captured.err)
+
+    def test_yaml_skip_underscore_files(self, tmp_path):
+        """Test that files starting with underscore are skipped"""
+        deps_dir = tmp_path / "dependencies"
+        deps_dir.mkdir()
+
+        # Create _schema.yml (should be skipped)
+        schema_data = {
+            "name": "should-not-load",
+            "version": "1.0",
+            "description": "This file should be skipped",
+            "basename_matches": ["should-not-exist.so"],
+            "linker_flags_matches": [],
+            "directory_matches": []
+        }
+        with open(deps_dir / "_schema.yml", 'w') as f:
+            yaml.dump(schema_data, f)
+
+        # Create normal file (should be loaded)
+        normal_data = {
+            "name": "normal-dep",
+            "version": "1.0",
+            "description": "Normal dependency file",
+            "basename_matches": ["normal.so"],
+            "linker_flags_matches": [],
+            "directory_matches": []
+        }
+        with open(deps_dir / "normal-dep.yml", 'w') as f:
+            yaml.dump(normal_data, f)
+
+        matcher = PatternMatcher(deps_dir)
+
+        # Schema file should not be loaded
+        assert "should-not-exist.so" not in matcher.pattern_mappings
+
+        # Normal file should be loaded
+        assert "normal.so" in matcher.pattern_mappings
+        assert matcher.pattern_mappings["normal.so"] == "normal-dep"
+
+    def test_yaml_mixed_loading(self, tmp_path):
+        """Test loading both dpkg.yml and individual files together"""
+        deps_dir = tmp_path / "dependencies"
+        deps_dir.mkdir()
+
+        # Create dpkg.yml
+        dpkg_data = {
+            "dependencies": [{
+                "name": "system-dep",
+                "version": "1.0",
+                "description": "System dependency from dpkg",
+                "basename_matches": ["libsystem.so"],
+                "linker_flags_matches": [],
+                "directory_matches": []
+            }]
+        }
+        with open(deps_dir / "dpkg.yml", 'w') as f:
+            yaml.dump(dpkg_data, f)
+
+        # Create individual file
+        custom_data = {
+            "name": "custom-dep",
+            "version": "2.0",
+            "description": "Custom dependency from individual file",
+            "basename_matches": ["libcustom.so"],
+            "linker_flags_matches": [],
+            "directory_matches": []
+        }
+        with open(deps_dir / "custom-dep.yml", 'w') as f:
+            yaml.dump(custom_data, f)
+
+        matcher = PatternMatcher(deps_dir)
+
+        # Both should be loaded
+        assert "libsystem.so" in matcher.pattern_mappings
+        assert matcher.pattern_mappings["libsystem.so"] == "system-dep"
+        assert "libcustom.so" in matcher.pattern_mappings
+        assert matcher.pattern_mappings["libcustom.so"] == "custom-dep"
+
+    def test_yaml_empty_arrays(self, tmp_path):
+        """Test that empty arrays in YAML are handled correctly"""
+        deps_dir = tmp_path / "dependencies"
+        deps_dir.mkdir()
+
+        # Create dependency with empty arrays
+        dep_data = {
+            "name": "minimal-dep",
+            "version": "1.0",
+            "description": "Minimal dependency with empty arrays",
+            "basename_matches": [],
+            "linker_flags_matches": [],
+            "directory_matches": ["minimal"]
+        }
+        with open(deps_dir / "minimal-dep.yml", 'w') as f:
+            yaml.dump(dep_data, f)
+
+        matcher = PatternMatcher(deps_dir)
+
+        # Should load successfully
+        assert "minimal" in matcher.path_aliases
+        assert matcher.path_aliases["minimal"] == "minimal-dep"
+
+
+# ============================================================================
+# Test OutputGenerator
+# ============================================================================
+
+
+class TestOutputGenerator:
+    """Test cases for OutputGenerator class"""
+
+    def test_generate_creates_files(self, tmp_path):
+        """Test generate() creates known and unknown YAML files"""
+        artifacts = [
+            Artifact(path="/usr/include/stdio.h",
+                     type="header",
+                     source="test.d"),
+            Artifact(path="/usr/lib/libfoo.so",
+                     type="library",
+                     source="link.txt"),
+            Artifact(path="/unknown/header.h", type="header", source="test.d")
+        ]
+
+        mappings = [
+            Mapping(artifact=artifacts[0],
+                    dependency="libc6",
+                    confidence="high",
+                    strategy="dpkg-query"),
+            Mapping(artifact=artifacts[1],
+                    dependency="foo",
+                    confidence="medium",
+                    strategy="pattern")
+        ]
+
+        output_dir = tmp_path / "reports"
+        known_file, unknown_file = OutputGenerator.generate(
+            mappings, artifacts, output_dir)
+
+        # Check files exist
+        assert known_file.exists()
+        assert unknown_file.exists()
+
+        # Check known.yml content (simplified structure: dependencies dict of lists)
+        with open(known_file) as f:
+            known_data = yaml.safe_load(f)
+
+        assert known_data['summary']['total_artifacts'] == 3
+        assert known_data['summary']['mapped'] == 2
+        assert known_data['summary']['unmapped'] == 1
+        assert len(known_data['dependencies']) == 2
+        # Check dependencies is a dict with lists of paths
+        assert isinstance(known_data['dependencies'], dict)
+        assert 'libc6' in known_data['dependencies']
+        assert 'foo' in known_data['dependencies']
+        assert '/usr/include/stdio.h' in known_data['dependencies']['libc6']
+        assert '/usr/lib/libfoo.so' in known_data['dependencies']['foo']
+
+        # Check unknown.yml content (simplified structure: flat list of paths)
+        with open(unknown_file) as f:
+            unknown_data = yaml.safe_load(f)
+
+        assert unknown_data['summary']['count'] == 1
+        assert len(unknown_data['artifacts']) == 1
+        assert "/unknown/header.h" in unknown_data['artifacts']
+
+    def test_generate_groups_by_dependency(self, tmp_path):
+        """Test generate() groups artifacts by dependency"""
+        artifacts = [
+            Artifact(path="/usr/include/stdio.h",
+                     type="header",
+                     source="test1.d"),
+            Artifact(path="/usr/include/stdlib.h",
+                     type="header",
+                     source="test2.d"),
+        ]
+
+        mappings = [
+            Mapping(artifact=artifacts[0],
+                    dependency="libc6",
+                    confidence="high",
+                    strategy="dpkg"),
+            Mapping(artifact=artifacts[1],
+                    dependency="libc6",
+                    confidence="high",
+                    strategy="dpkg"),
+        ]
+
+        output_dir = tmp_path / "reports"
+        known_file, _ = OutputGenerator.generate(mappings, artifacts,
+                                                 output_dir)
+
+        with open(known_file) as f:
+            known_data = yaml.safe_load(f)
+
+        # Should have 1 dependency with 2 artifacts (simplified: dict of lists)
+        assert len(known_data['dependencies']) == 1
+        assert 'libc6' in known_data['dependencies']
+        assert len(known_data['dependencies']['libc6']) == 2
+        assert '/usr/include/stdio.h' in known_data['dependencies']['libc6']
+        assert '/usr/include/stdlib.h' in known_data['dependencies']['libc6']
+
+    def test_generate_coverage_calculation(self, tmp_path):
+        """Test generate() calculates coverage correctly"""
+        artifacts = [
+            Artifact(path=f"/test{i}.h", type="header", source="test.d")
+            for i in range(10)
+        ]
+        mappings = [
+            Mapping(artifact=artifacts[i],
+                    dependency=f"dep{i}",
+                    confidence="high",
+                    strategy="dpkg") for i in range(7)  # 7 out of 10 mapped
+        ]
+
+        output_dir = tmp_path / "reports"
+        known_file, _ = OutputGenerator.generate(mappings, artifacts,
+                                                 output_dir)
+
+        with open(known_file) as f:
+            known_data = yaml.safe_load(f)
+
+        # Verify summary section is still included in YAML output
+        assert "70.0%" in known_data['summary']['coverage']
+
+    def test_generate_path_issues_yml_basic(self, tmp_path):
+        """Test generate() creates path_issues.yml with non-existent headers"""
+        artifacts = [
+            Artifact(path="/usr/include/stdio.h",
+                     type="header",
+                     source="test.d",
+                     metadata={'path_exists': True}),
+            Artifact(path="/nonexistent/header.h",
+                     type="header",
+                     source="test2.d",
+                     metadata={
+                         'path_exists': False,
+                         'original_path': 'nonexistent/header.h'
+                     }),
+            Artifact(path="/missing/include.h",
+                     type="header",
+                     source="test3.d",
+                     metadata={
+                         'path_exists': False,
+                         'original_path': 'missing/include.h'
+                     }),
+        ]
+
+        mappings = [
+            Mapping(artifact=artifacts[0],
+                    dependency="libc6",
+                    confidence="high",
+                    strategy="dpkg-query")
+        ]
+
+        output_dir = tmp_path / "reports"
+        OutputGenerator.generate(mappings, artifacts, output_dir)
+
+        # Check path_issues.yml exists
+        path_issues_file = output_dir / "path_issues.yml"
+        assert path_issues_file.exists()
+
+        # Load and verify content
+        with open(path_issues_file) as f:
+            path_issues_data = yaml.safe_load(f)
+
+        # Should have 2 non-existent paths (not the existing one)
+        assert path_issues_data['summary']['count'] == 2
+        assert path_issues_data['summary']['total_artifacts'] == 3
+        assert path_issues_data['summary']['percentage'] == "66.7%"
+
+        # Verify non_existent_paths contains the right entries
+        non_existent_paths = path_issues_data['non_existent_paths']
+        assert len(non_existent_paths) == 2
+
+        # Check field names
+        assert all('resolved_path' in entry for entry in non_existent_paths)
+        assert all('type' in entry for entry in non_existent_paths)
+        assert all('source' in entry for entry in non_existent_paths)
+        assert all('d_file_path' in entry for entry in non_existent_paths)
+
+        # Check values
+        paths = [entry['resolved_path'] for entry in non_existent_paths]
+        assert "/nonexistent/header.h" in paths
+        assert "/missing/include.h" in paths
+        assert "/usr/include/stdio.h" not in paths
+
+    def test_generate_path_issues_yml_excludes_libraries(self, tmp_path):
+        """Test path_issues.yml excludes library artifacts even if they don't exist"""
+        artifacts = [
+            Artifact(path="/nonexistent/header.h",
+                     type="header",
+                     source="test.d",
+                     metadata={
+                         'path_exists': False,
+                         'original_path': 'nonexistent/header.h'
+                     }),
+            Artifact(path="-lmissing",
+                     type="library",
+                     source="link.txt",
+                     metadata={'path_exists': False}),
+            Artifact(path="/missing/libfoo.so",
+                     type="library",
+                     source="link.txt",
+                     metadata={'path_exists': False}),
+        ]
+
+        mappings = []
+
+        output_dir = tmp_path / "reports"
+        OutputGenerator.generate(mappings, artifacts, output_dir)
+
+        # Load path_issues.yml
+        path_issues_file = output_dir / "path_issues.yml"
+        with open(path_issues_file) as f:
+            path_issues_data = yaml.safe_load(f)
+
+        # Should only have 1 entry (the header, not the libraries)
+        assert path_issues_data['summary']['count'] == 1
+        assert len(path_issues_data['non_existent_paths']) == 1
+
+        # Verify it's the header
+        entry = path_issues_data['non_existent_paths'][0]
+        assert entry['resolved_path'] == "/nonexistent/header.h"
+        assert entry['type'] == "header"
+        assert entry['d_file_path'] == "nonexistent/header.h"
+
+    def test_generate_path_issues_yml_field_names(self, tmp_path):
+        """Test path_issues.yml has correct field names (not 'path', but 'resolved_path')"""
+        artifacts = [
+            Artifact(path="/resolved/absolute/path.h",
+                     type="header",
+                     source="build/CMakeFiles/test.d",
+                     metadata={
+                         'path_exists': False,
+                         'original_path': 'relative/path.h'
+                     }),
+        ]
+
+        mappings = []
+
+        output_dir = tmp_path / "reports"
+        OutputGenerator.generate(mappings, artifacts, output_dir)
+
+        # Load path_issues.yml
+        path_issues_file = output_dir / "path_issues.yml"
+        with open(path_issues_file) as f:
+            path_issues_data = yaml.safe_load(f)
+
+        entry = path_issues_data['non_existent_paths'][0]
+
+        # Verify field names
+        assert 'resolved_path' in entry
+        assert 'type' in entry
+        assert 'source' in entry
+        assert 'd_file_path' in entry
+
+        # Verify it does NOT use 'path' as field name
+        assert 'path' not in entry
+
+        # Verify values
+        assert entry['resolved_path'] == "/resolved/absolute/path.h"
+        assert entry['type'] == "header"
+        assert entry['source'] == "build/CMakeFiles/test.d"
+        assert entry['d_file_path'] == "relative/path.h"
+
+    def test_generate_path_issues_yml_percentage_calculation(self, tmp_path):
+        """Test path_issues.yml calculates percentage correctly"""
+        # Create 10 artifacts: 3 non-existent headers, 7 existing
+        artifacts = []
+        for i in range(3):
+            artifacts.append(
+                Artifact(path=f"/missing{i}.h",
+                         type="header",
+                         source="test.d",
+                         metadata={
+                             'path_exists': False,
+                             'original_path': f'missing{i}.h'
+                         }))
+        for i in range(7):
+            artifacts.append(
+                Artifact(path=f"/exists{i}.h",
+                         type="header",
+                         source="test.d",
+                         metadata={'path_exists': True}))
+
+        mappings = []
+
+        output_dir = tmp_path / "reports"
+        OutputGenerator.generate(mappings, artifacts, output_dir)
+
+        # Load path_issues.yml
+        path_issues_file = output_dir / "path_issues.yml"
+        with open(path_issues_file) as f:
+            path_issues_data = yaml.safe_load(f)
+
+        # 3 out of 10 = 30%
+        assert path_issues_data['summary']['count'] == 3
+        assert path_issues_data['summary']['total_artifacts'] == 10
+        assert path_issues_data['summary']['percentage'] == "30.0%"
+
+    def test_generate_path_issues_yml_only_includes_path_exists_false(
+            self, tmp_path):
+        """Test path_issues.yml only includes artifacts with path_exists=False"""
+        artifacts = [
+            Artifact(path="/exists.h",
+                     type="header",
+                     source="test.d",
+                     metadata={'path_exists': True}),
+            Artifact(path="/missing.h",
+                     type="header",
+                     source="test.d",
+                     metadata={
+                         'path_exists': False,
+                         'original_path': 'missing.h'
+                     }),
+            Artifact(path="/no_metadata.h", type="header",
+                     source="test.d"),  # No metadata
+        ]
+
+        mappings = []
+
+        output_dir = tmp_path / "reports"
+        OutputGenerator.generate(mappings, artifacts, output_dir)
+
+        # Load path_issues.yml
+        path_issues_file = output_dir / "path_issues.yml"
+        with open(path_issues_file) as f:
+            path_issues_data = yaml.safe_load(f)
+
+        # Only the one with path_exists=False
+        assert path_issues_data['summary']['count'] == 1
+        assert len(path_issues_data['non_existent_paths']) == 1
+        assert path_issues_data['non_existent_paths'][0][
+            'resolved_path'] == "/missing.h"
+
+    def test_generate_path_issues_yml_empty_when_all_exist(self, tmp_path):
+        """Test path_issues.yml has zero entries when all paths exist"""
+        artifacts = [
+            Artifact(path="/exists1.h",
+                     type="header",
+                     source="test.d",
+                     metadata={'path_exists': True}),
+            Artifact(path="/exists2.h",
+                     type="header",
+                     source="test.d",
+                     metadata={'path_exists': True}),
+        ]
+
+        mappings = []
+
+        output_dir = tmp_path / "reports"
+        OutputGenerator.generate(mappings, artifacts, output_dir)
+
+        # Load path_issues.yml
+        path_issues_file = output_dir / "path_issues.yml"
+        with open(path_issues_file) as f:
+            path_issues_data = yaml.safe_load(f)
+
+        # Should have 0 entries
+        assert path_issues_data['summary']['count'] == 0
+        assert path_issues_data['summary']['total_artifacts'] == 2
+        assert path_issues_data['summary']['percentage'] == "0.0%"
+        assert len(path_issues_data['non_existent_paths']) == 0
+
+    def test_generate_path_issues_yml_mixed_artifact_types(self, tmp_path):
+        """Test path_issues.yml with headers, libraries, and binaries"""
+        artifacts = [
+            Artifact(path="/missing_header.h",
+                     type="header",
+                     source="test.d",
+                     metadata={
+                         'path_exists': False,
+                         'original_path': 'missing_header.h'
+                     }),
+            Artifact(path="-lmissing",
+                     type="library",
+                     source="link.txt",
+                     metadata={'path_exists': False}),
+            Artifact(path="/missing_binary.so",
+                     type="binary",
+                     source="wheel",
+                     metadata={
+                         'path_exists': False,
+                         'original_path': 'missing_binary.so'
+                     }),
+        ]
+
+        mappings = []
+
+        output_dir = tmp_path / "reports"
+        OutputGenerator.generate(mappings, artifacts, output_dir)
+
+        # Load path_issues.yml
+        path_issues_file = output_dir / "path_issues.yml"
+        with open(path_issues_file) as f:
+            path_issues_data = yaml.safe_load(f)
+
+        # Should include header and binary, but not library
+        assert path_issues_data['summary']['count'] == 2
+        assert len(path_issues_data['non_existent_paths']) == 2
+
+        # Verify types
+        types = {
+            entry['type']
+            for entry in path_issues_data['non_existent_paths']
+        }
+        assert 'header' in types
+        assert 'binary' in types
+        assert 'library' not in types
+
+    def test_generate_path_issues_yml_original_path_metadata(self, tmp_path):
+        """Test path_issues.yml uses original_path metadata for d_file_path field"""
+        artifacts = [
+            Artifact(path="/resolved/absolute/path/include/header.h",
+                     type="header",
+                     source="build/CMakeFiles/target.dir/test.d",
+                     metadata={
+                         'path_exists': False,
+                         'original_path': 'relative/include/header.h'
+                     }),
+        ]
+
+        mappings = []
+
+        output_dir = tmp_path / "reports"
+        OutputGenerator.generate(mappings, artifacts, output_dir)
+
+        # Load path_issues.yml
+        path_issues_file = output_dir / "path_issues.yml"
+        with open(path_issues_file) as f:
+            path_issues_data = yaml.safe_load(f)
+
+        entry = path_issues_data['non_existent_paths'][0]
+
+        # d_file_path should be the original relative path from the .d file
+        assert entry['d_file_path'] == 'relative/include/header.h'
+        # resolved_path should be the absolute resolved path
+        assert entry[
+            'resolved_path'] == "/resolved/absolute/path/include/header.h"
+
+    def test_generate_path_issues_yml_missing_original_path_metadata(
+            self, tmp_path):
+        """Test path_issues.yml handles missing original_path metadata gracefully"""
+        artifacts = [
+            Artifact(path="/missing.h",
+                     type="header",
+                     source="test.d",
+                     metadata={'path_exists': False}),  # No original_path
+        ]
+
+        mappings = []
+
+        output_dir = tmp_path / "reports"
+        OutputGenerator.generate(mappings, artifacts, output_dir)
+
+        # Load path_issues.yml
+        path_issues_file = output_dir / "path_issues.yml"
+        with open(path_issues_file) as f:
+            path_issues_data = yaml.safe_load(f)
+
+        entry = path_issues_data['non_existent_paths'][0]
+
+        # Should have 'N/A' when original_path is missing
+        assert entry['d_file_path'] == 'N/A'
+        assert entry['resolved_path'] == "/missing.h"
+
+    def test_generate_path_issues_yml_note_field(self, tmp_path):
+        """Test path_issues.yml summary contains explanatory note"""
+        artifacts = [
+            Artifact(path="/missing.h",
+                     type="header",
+                     source="test.d",
+                     metadata={
+                         'path_exists': False,
+                         'original_path': 'missing.h'
+                     }),
+        ]
+
+        mappings = []
+
+        output_dir = tmp_path / "reports"
+        OutputGenerator.generate(mappings, artifacts, output_dir)
+
+        # Load path_issues.yml
+        path_issues_file = output_dir / "path_issues.yml"
+        with open(path_issues_file) as f:
+            path_issues_data = yaml.safe_load(f)
+
+        # Verify note field exists and mentions libraries are excluded
+        assert 'note' in path_issues_data['summary']
+        note = path_issues_data['summary']['note']
+        assert 'libraries excluded' in note.lower()
+        assert 'do not exist' in note.lower()
+
+
+# ============================================================================
+# Integration Tests
+# ============================================================================
+
+
+class TestIntegration:
+    """Integration tests for full workflow"""
+
+    def test_full_workflow(self, tmp_path):
+        """Test complete scan workflow end-to-end"""
+        # Setup: Create dependencies directory with YAML files
+        deps_dir = tmp_path / "dependencies"
+        deps_dir.mkdir()
+
+        # Create dpkg.yml
+        dpkg_data = {
+            "dependencies": [{
+                "name": "libc6",
+                "version": "2.35",
+                "description": "GNU C Library: Shared libraries",
+                "basename_matches": [],
+                "linker_flags_matches": ["-lpthread"],
+                "directory_matches": []
+            }]
+        }
+        with open(deps_dir / "dpkg.yml", 'w') as f:
+            yaml.dump(dpkg_data, f)
+
+        # Setup: Create build artifacts
+        build_dir = tmp_path / "build"
+        build_dir.mkdir()
+
+        d_file = build_dir / "test.d"
+        d_file.write_text("build/foo.o: /usr/include/stdio.h\n")
+
+        link_file = build_dir / "link.txt"
+        link_file.write_text("/usr/bin/c++ -o foo -lpthread\n")
+
+        # Run workflow
+        collector = ArtifactCollector(build_dir)
+        artifacts = collector.collect_all()
+
+        dpkg_resolver = DpkgResolver()
+        pattern_matcher = PatternMatcher(deps_dir)
+
+        all_mappings = []
+
+        # Try dpkg first
+        for artifact in artifacts:
+            with patch.object(dpkg_resolver,
+                              'get_package',
+                              return_value='libc6'):
+                package = dpkg_resolver.get_package(artifact.path)
+                if package:
+                    all_mappings.append(
+                        Mapping(artifact=artifact,
+                                dependency=package,
+                                confidence='high',
+                                strategy='dpkg-query'))
+
+        # Try patterns for remaining
+        dpkg_paths = {m.artifact.path for m in all_mappings}
+        for artifact in artifacts:
+            if artifact.path not in dpkg_paths:
+                mapping = pattern_matcher.match(artifact)
+                if mapping:
+                    all_mappings.append(mapping)
+
+        # Generate reports
+        output_dir = tmp_path / "reports"
+        known_file, unknown_file = OutputGenerator.generate(
+            all_mappings, artifacts, output_dir)
+
+        # Verify outputs
+        assert known_file.exists()
+        assert unknown_file.exists()
+
+        with open(known_file) as f:
+            data = yaml.safe_load(f)
+            assert data['summary']['total_artifacts'] > 0
+
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])