From a4c5f46de7188b1077bece81d191db78362134d1 Mon Sep 17 00:00:00 2001 From: Miyoung Choi Date: Fri, 16 Jan 2026 11:48:44 -0800 Subject: [PATCH 1/6] add json output extension --- docs/_extensions/json_output/README.md | 292 +++++++++++++ docs/_extensions/json_output/__init__.py | 33 ++ docs/_extensions/json_output/config.py | 211 ++++++++++ .../json_output/content/__init__.py | 9 + .../json_output/content/extractor.py | 230 +++++++++++ .../json_output/content/metadata.py | 79 ++++ .../json_output/content/structured.py | 384 ++++++++++++++++++ docs/_extensions/json_output/content/text.py | 357 ++++++++++++++++ docs/_extensions/json_output/core/__init__.py | 17 + docs/_extensions/json_output/core/builder.py | 96 +++++ .../json_output/core/document_discovery.py | 115 ++++++ .../json_output/core/global_metadata.py | 147 +++++++ .../json_output/core/hierarchy_builder.py | 125 ++++++ .../json_output/core/json_formatter.py | 264 ++++++++++++ .../json_output/core/json_writer.py | 88 ++++ .../json_output/processing/__init__.py | 12 + .../json_output/processing/cache.py | 94 +++++ .../json_output/processing/processor.py | 199 +++++++++ docs/_extensions/json_output/utils.py | 122 ++++++ docs/conf.py | 18 +- 20 files changed, 2888 insertions(+), 4 deletions(-) create mode 100644 docs/_extensions/json_output/README.md create mode 100644 docs/_extensions/json_output/__init__.py create mode 100644 docs/_extensions/json_output/config.py create mode 100644 docs/_extensions/json_output/content/__init__.py create mode 100644 docs/_extensions/json_output/content/extractor.py create mode 100644 docs/_extensions/json_output/content/metadata.py create mode 100644 docs/_extensions/json_output/content/structured.py create mode 100644 docs/_extensions/json_output/content/text.py create mode 100644 docs/_extensions/json_output/core/__init__.py create mode 100644 docs/_extensions/json_output/core/builder.py create mode 100644 docs/_extensions/json_output/core/document_discovery.py create mode 100644 docs/_extensions/json_output/core/global_metadata.py create mode 100644 docs/_extensions/json_output/core/hierarchy_builder.py create mode 100644 docs/_extensions/json_output/core/json_formatter.py create mode 100644 docs/_extensions/json_output/core/json_writer.py create mode 100644 docs/_extensions/json_output/processing/__init__.py create mode 100644 docs/_extensions/json_output/processing/cache.py create mode 100644 docs/_extensions/json_output/processing/processor.py create mode 100644 docs/_extensions/json_output/utils.py diff --git a/docs/_extensions/json_output/README.md b/docs/_extensions/json_output/README.md new file mode 100644 index 0000000000..118021d22b --- /dev/null +++ b/docs/_extensions/json_output/README.md @@ -0,0 +1,292 @@ +# JSON Output Extension + +Sphinx extension to generate JSON output for every page alongside HTML output. + +Similar to Hugo's output formats, this creates parallel JSON files for each document +containing metadata, content, and other structured data that can be consumed by +search engines, APIs, or other applications. + +The main use case is generating comprehensive search indexes for tools like Solr, +Lunr.js, or custom search implementations. + +## Search Index Integration + +The main index.json file contains all documents with full content, perfect for: + +- **Lunr.js**: Load index.json and build search index from documents +- **Solr**: POST the JSON data to Solr's update endpoint +- **Elasticsearch**: Bulk index the documents array +- **Custom search**: Parse JSON and implement your own search logic + +## Enhanced JSON Structure + +The JSON structure includes search-optimized fields and global metadata from `conf.py`: + +```json +{ + "id": "guide/installation", + "title": "Installation Guide", + "url": "/guide/installation.html", + "last_modified": "2025-01-15T10:30:00Z", + + "book": { + "title": "NVIDIA NeMo Curator User Guide", + "version": "25.09" + }, + "product": { + "name": "NeMo Curator", + "family": ["NeMo"], + "version": "25.09" + }, + "site": { + "name": "NVIDIA Technical Documentation" + }, + + "content": "Full markdown content here...", + "content_length": 5420, + "word_count": 850, + "format": "text", + "summary": "Quick summary for previews...", + "doc_type": "tutorial", + "section_path": ["Guide", "Installation"], + "headings": [ + {"text": "Prerequisites", "level": 2, "id": "prerequisites"} + ], + "headings_text": "Prerequisites Installation Steps Troubleshooting", + "keywords": ["install", "setup", "prerequisites", "docker", "python"], + "code_blocks": [ + {"content": "pip install package", "language": "bash"} + ], + "links": [ + { + "text": "Configuration Guide", + "url": "/guide/config.html", + "type": "cross_reference", + "ref_type": "doc", + "target_doc": "guide/config" + }, + { + "text": "GitHub Repository", + "url": "https://github.com/NVIDIA/...", + "type": "external" + } + ], + "tags": ["setup", "guide"], + "categories": ["tutorials"] +} +``` + +## Configuration Examples + +### Minimal Configuration (Recommended) + +Uses optimized defaults for best performance: + +```python +# conf.py +json_output_settings = { + 'enabled': True, # All other settings use performance-optimized defaults +} +``` + +### Comprehensive Search Index (Default Behavior) + +```python +json_output_settings = { + 'enabled': True, + 'verbose': True, # Default: detailed logging + 'parallel': True, # Default: parallel processing + 'main_index_mode': 'full', # Default: full content + 'max_main_index_docs': 0, # Default: no limit + 'minify_json': True, # Default: smaller files + 'filter_search_clutter': True, # Default: clean content +} +``` + +### Large Sites Configuration + +```python +json_output_settings = { + 'enabled': True, + 'max_main_index_docs': 500, # Limit to 500 documents + 'content_max_length': 20000, # Limit content length + 'skip_large_files': 50000, # Skip files over 50KB +} +``` + +### Fastest Builds (Minimal Features) + +```python +json_output_settings = { + 'enabled': True, + 'main_index_mode': 'metadata_only', # Only titles, descriptions, tags + 'lazy_extraction': True, # Skip keywords, links, code_blocks, images + 'skip_complex_parsing': True, # Skip complex parsing features +} +``` + +## Available Settings + +### Core Settings + +- **enabled** (bool): Enable/disable JSON output generation. Default: `True` +- **verbose** (bool): Enable verbose logging. Default: `True` +- **parallel** (bool): Enable parallel processing. Default: `True` +- **exclude_patterns** (list): Patterns to exclude from JSON generation. Default: `['_build', '_templates', '_static']` +- **include_children** (bool): Include child documents in directory indexes. Default: `True` +- **include_child_content** (bool): Include full content in child documents. Default: `True` +- **main_index_mode** (str): How to handle main index page. Options: `'disabled'`, `'metadata_only'`, `'full'`. Default: `'full'` +- **max_main_index_docs** (int): Maximum documents to include in main index (0 = no limit). Default: `0` + +### Search Optimization Features + +- **extract_code_blocks** (bool): Include code blocks in search data. Default: `True` +- **extract_links** (bool): Include internal/external links. Default: `True` +- **extract_images** (bool): Include image references. Default: `True` +- **extract_keywords** (bool): Auto-extract technical keywords (frontmatter `keywords` field takes priority). Default: `True` +- **include_doc_type** (bool): Auto-detect document types (tutorial, guide, reference, etc.). Default: `True` +- **include_section_path** (bool): Include hierarchical section paths. Default: `True` + +### Link Extraction Options + +- **link_normalization** (bool): Normalize internal URLs to absolute paths with `.html` extension. Default: `True` +- **link_include_ref_type** (bool): Include `ref_type` metadata (ref, doc, any, etc.) for cross-references. Default: `True` +- **link_include_target_doc** (bool): Include `target_doc` for cross-references (enables document relationship mapping). Default: `True` +- **link_resolve_titles** (bool): Resolve filename-like link text (e.g., "index") to document titles (e.g., "Getting Started"). Default: `True` + +### Performance Controls + +- **content_max_length** (int): Max content length per document (0 = no limit). Default: `50000` +- **summary_max_length** (int): Max summary length. Default: `500` +- **keywords_max_count** (int): Max keywords per document. Default: `50` + +### Output Format Options + +- **minify_json** (bool): Minify JSON output (removes indentation for smaller files). Default: `True` +- **separate_content** (bool): Store content in separate .content.json files for better performance. Default: `False` + +### Speed Optimizations + +- **parallel_workers** (str): Number of parallel workers. Default: `'auto'` +- **batch_size** (int): Process documents in batches. Default: `50` +- **cache_aggressive** (bool): Enable aggressive caching. Default: `True` +- **lazy_extraction** (bool): Skip feature extraction (keywords, links, code_blocks, images) for faster builds. Default: `False` +- **skip_large_files** (int): Skip files larger than N bytes. Default: `100000` +- **incremental_build** (bool): Only process changed files. Default: `True` +- **memory_limit_mb** (int): Memory limit per worker. Default: `512` +- **fast_text_extraction** (bool): Use faster text extraction. Default: `True` +- **skip_complex_parsing** (bool): Skip complex parsing features. Default: `False` + +### Content Filtering + +- **filter_search_clutter** (bool): Remove SVG, toctree, and other non-searchable content. Default: `True` + +### Global Metadata + +- **global_metadata** (dict): User-defined global fields injected into all JSON files. Default: `{}` +- **infer_global_metadata** (bool): Auto-infer book/product/site from Sphinx config. Default: `True` + +## Global Metadata from conf.py + +The extension can inject site-wide metadata from `conf.py` into every JSON file, providing consistent book/product/site context without requiring frontmatter on each page. + +### Auto-Inference (Default) + +By default, the extension auto-infers global metadata from standard Sphinx configuration: + +| JSON Field | Source | +|------------|--------| +| `book.title` | `project` | +| `book.version` | `release` | +| `product.name` | Extracted from `project` (strips "NVIDIA" prefix and doc suffixes) | +| `product.version` | `release` | +| `product.family` | `html_context["product_family"]` (if set) | +| `site.name` | `html_context["site_name"]` (if set) | + +### Explicit Configuration + +For full control, provide explicit `global_metadata`: + +```python +# conf.py +project = "NVIDIA NeMo Curator User Guide" +release = "25.09" + +json_output_settings = { + "enabled": True, + "global_metadata": { + "book": { + "title": project, + "version": release, + }, + "product": { + "name": "NeMo Curator", + "family": ["NeMo"], + "version": release, + }, + "site": { + "name": "NVIDIA Technical Documentation", + }, + }, +} +``` + +### Using html_context for Inference + +You can also set values via `html_context` for auto-inference: + +```python +# conf.py +project = "NVIDIA NeMo Curator User Guide" +release = "25.09" + +html_context = { + "product_name": "NeMo Curator", + "product_family": ["NeMo"], + "site_name": "NVIDIA Technical Documentation", +} + +json_output_settings = { + "enabled": True, + "infer_global_metadata": True, # Default +} +``` + +### Disabling Global Metadata + +To disable global metadata entirely: + +```python +json_output_settings = { + "enabled": True, + "infer_global_metadata": False, + "global_metadata": {}, +} +``` + +## Content Gating Integration + +This extension automatically respects content gating rules set by the content_gating extension at multiple levels: + +### Document-Level Gating +Documents with 'only' conditions in frontmatter that fail evaluation (e.g., 'only: not ga' when building with -t ga) will be excluded from JSON generation entirely, ensuring sensitive content doesn't leak into search indexes. + +### Content-Level Gating +Content sections wrapped in `{conditional}` directives are also properly filtered. When conditions don't match, the content is excluded from the document tree and won't appear in the generated JSON. + +### Integration Details +- **Automatic Detection**: Detects if content_gating extension is loaded +- **Exclude Pattern Sync**: Respects documents added to exclude_patterns by content gating +- **Build Tag Awareness**: Logs current build tags for debugging +- **Debug Logging**: Provides detailed logs when content gating rules are applied + +The integration works seamlessly - just enable both extensions and your JSON output will automatically respect all content gating rules without additional configuration. + +## Performance Tips + +1. **Enable parallel processing** for faster builds on multi-core systems +2. **Use incremental builds** to only process changed files +3. **Set content length limits** for large documentation sites +4. **Enable content filtering** to reduce JSON file sizes +5. **Use batch processing** to control memory usage +6. **Skip large files** to avoid processing massive documents \ No newline at end of file diff --git a/docs/_extensions/json_output/__init__.py b/docs/_extensions/json_output/__init__.py new file mode 100644 index 0000000000..1a821cbdc2 --- /dev/null +++ b/docs/_extensions/json_output/__init__.py @@ -0,0 +1,33 @@ +""" +Sphinx extension to generate JSON output for every page alongside HTML output. + +This extension creates parallel JSON files for each document containing metadata, +content, and other structured data that can be consumed by search engines, APIs, +or other applications. + +See README.md for detailed configuration options and usage examples. +""" + +from typing import Any + +from sphinx.application import Sphinx + +from .config import get_default_settings, validate_config +from .processing import on_build_finished + + +def setup(app: Sphinx) -> dict[str, Any]: + """Setup function for Sphinx extension.""" + # Add configuration with default settings + default_settings = get_default_settings() + app.add_config_value("json_output_settings", default_settings, "html") + + # Connect to build events + app.connect("config-inited", validate_config) + app.connect("build-finished", on_build_finished) + + return { + "version": "1.0.0", + "parallel_read_safe": True, + "parallel_write_safe": True, + } diff --git a/docs/_extensions/json_output/config.py b/docs/_extensions/json_output/config.py new file mode 100644 index 0000000000..4f9a102e6f --- /dev/null +++ b/docs/_extensions/json_output/config.py @@ -0,0 +1,211 @@ +"""Configuration management for JSON output extension.""" + +from typing import Any + +from sphinx.application import Sphinx +from sphinx.config import Config +from sphinx.util import logging + +logger = logging.getLogger(__name__) + +# Constants +MAX_PARALLEL_WORKERS = 32 + + +def get_default_settings() -> dict[str, Any]: + """Get default configuration settings for json_output extension.""" + return { + "enabled": True, + "exclude_patterns": ["_build", "_templates", "_static"], + "verbose": True, # Enable by default for better user feedback + "parallel": True, # Enable parallel processing by default for speed + "include_children": True, + "include_child_content": True, + "main_index_mode": "full", # 'disabled', 'metadata_only', 'full' + "max_main_index_docs": 0, # No limit by default for comprehensive search + # Search optimization features + "extract_code_blocks": True, # Include code blocks in search data + "extract_links": True, # Include internal/external links + "extract_images": True, # Include image references + "extract_keywords": True, # Auto-extract technical keywords + "include_doc_type": True, # Auto-detect document types + "include_section_path": True, # Include hierarchical section paths + # Link extraction options + "link_normalization": True, # Normalize internal URLs to absolute paths + "link_include_ref_type": True, # Include ref_type metadata (ref, doc, etc.) + "link_include_target_doc": True, # Include target_doc for cross-references + "link_resolve_titles": True, # Resolve filename-like link text to document titles + # Performance controls + "content_max_length": 50000, # Max content length per document (0 = no limit) + "summary_max_length": 500, # Max summary length + "keywords_max_count": 50, # Max keywords per document + # Output format options + "minify_json": True, # Minify JSON by default for better performance + "separate_content": False, # Store content in separate .content.json files + # Speed optimizations + "parallel_workers": "auto", # Number of parallel workers + "batch_size": 50, # Process documents in batches + "cache_aggressive": True, # Enable aggressive caching + "lazy_extraction": False, # Skip feature extraction (keywords, links, etc.) for faster builds + "skip_large_files": 100000, # Skip files larger than N bytes + "incremental_build": True, # Only process changed files + "memory_limit_mb": 512, # Memory limit per worker + "fast_text_extraction": True, # Use faster text extraction + "skip_complex_parsing": False, # Skip complex parsing features + # Content filtering + "filter_search_clutter": True, # Remove SVG, toctree, and other non-searchable content + # Global metadata from conf.py + "global_metadata": {}, # User-defined global fields (book, product, site) + "infer_global_metadata": True, # Auto-infer from Sphinx config (project, release) + } + + +def apply_config_defaults(settings: dict[str, Any]) -> dict[str, Any]: + """Apply default values to settings dictionary.""" + defaults = get_default_settings() + + for key, default_value in defaults.items(): + if key not in settings: + settings[key] = default_value + + return settings + + +def validate_config(_app: Sphinx, config: Config) -> None: + """Validate configuration values.""" + settings = _ensure_settings_dict(config) + settings = apply_config_defaults(settings) + config.json_output_settings = settings + + _validate_core_settings(settings) + _validate_content_limits(settings) + _validate_boolean_settings(settings) + _validate_integer_settings(settings) + _validate_parallel_workers(settings) + _validate_global_metadata(settings) + + +def _ensure_settings_dict(config: Config) -> dict[str, Any]: + """Ensure settings is a valid dictionary.""" + settings = getattr(config, "json_output_settings", {}) + if not isinstance(settings, dict): + logger.warning("json_output_settings must be a dictionary. Using defaults.") + settings = {} + config.json_output_settings = settings + return settings + + +def _validate_core_settings(settings: dict[str, Any]) -> None: + """Validate core configuration settings.""" + # Validate main index mode + valid_modes = ["disabled", "metadata_only", "full"] + mode = settings.get("main_index_mode", "full") + if mode not in valid_modes: + logger.warning(f"Invalid main_index_mode '{mode}'. Using 'full'. Valid options: {valid_modes}") + settings["main_index_mode"] = "full" + + # Validate exclude patterns + patterns = settings.get("exclude_patterns", []) + if not isinstance(patterns, list): + logger.warning("exclude_patterns must be a list. Using default.") + settings["exclude_patterns"] = ["_build", "_templates", "_static"] + + +def _validate_content_limits(settings: dict[str, Any]) -> None: + """Validate content-related limit settings.""" + limit_settings = { + "max_main_index_docs": (0, "0 (no limit)"), + "content_max_length": (50000, "50000 (0 = no limit)"), + "summary_max_length": (500, "500"), + "keywords_max_count": (50, "50"), + } + + for setting, (default_val, description) in limit_settings.items(): + value = settings.get(setting, default_val) + if not isinstance(value, int) or value < 0: + logger.warning(f"Invalid {setting} '{value}'. Using {description}.") + settings[setting] = default_val + + +def _validate_boolean_settings(settings: dict[str, Any]) -> None: + """Validate boolean configuration settings.""" + bool_settings = [ + "enabled", + "verbose", + "parallel", + "include_children", + "include_child_content", + "extract_code_blocks", + "extract_links", + "extract_images", + "extract_keywords", + "include_doc_type", + "include_section_path", + "link_normalization", + "link_include_ref_type", + "link_include_target_doc", + "link_resolve_titles", + "minify_json", + "separate_content", + "cache_aggressive", + "lazy_extraction", + "incremental_build", + "fast_text_extraction", + "skip_complex_parsing", + "filter_search_clutter", + "infer_global_metadata", + ] + + defaults = get_default_settings() + for setting in bool_settings: + if setting in settings and not isinstance(settings.get(setting), bool): + logger.warning(f"Setting '{setting}' must be boolean. Using default.") + settings[setting] = defaults[setting] + + +def _validate_integer_settings(settings: dict[str, Any]) -> None: + """Validate integer configuration settings with ranges.""" + int_settings = { + "batch_size": (1, 1000), # min, max + "skip_large_files": (0, None), # 0 = disabled + "memory_limit_mb": (64, 8192), # reasonable memory limits + } + + defaults = get_default_settings() + for setting, (min_val, max_val) in int_settings.items(): + if setting in settings: + value = settings[setting] + if not isinstance(value, int) or value < min_val or (max_val and value > max_val): + logger.warning( + f"Setting '{setting}' must be integer between {min_val} and {max_val or 'unlimited'}. Using default." + ) + settings[setting] = defaults[setting] + + +def _validate_parallel_workers(settings: dict[str, Any]) -> None: + """Validate parallel_workers setting (can be 'auto' or integer).""" + if "parallel_workers" in settings: + value = settings["parallel_workers"] + if value != "auto" and (not isinstance(value, int) or value < 1 or value > MAX_PARALLEL_WORKERS): + logger.warning( + f"Setting 'parallel_workers' must be 'auto' or integer between 1 and {MAX_PARALLEL_WORKERS}. Using default." + ) + defaults = get_default_settings() + settings["parallel_workers"] = defaults["parallel_workers"] + + +def _validate_global_metadata(settings: dict[str, Any]) -> None: + """Validate global_metadata setting structure.""" + global_metadata = settings.get("global_metadata", {}) + + if not isinstance(global_metadata, dict): + logger.warning("global_metadata must be a dictionary. Using empty default.") + settings["global_metadata"] = {} + return + + # Validate known top-level keys have dict values + valid_sections = ["book", "product", "site"] + for section in valid_sections: + if section in global_metadata and not isinstance(global_metadata[section], dict): + logger.warning(f"global_metadata.{section} must be a dictionary. Removing invalid value.") + del global_metadata[section] diff --git a/docs/_extensions/json_output/content/__init__.py b/docs/_extensions/json_output/content/__init__.py new file mode 100644 index 0000000000..0eee0b4920 --- /dev/null +++ b/docs/_extensions/json_output/content/__init__.py @@ -0,0 +1,9 @@ +"""Content extraction functions for JSON output.""" + +from .extractor import extract_document_content +from .metadata import extract_document_metadata + +__all__ = [ + "extract_document_content", + "extract_document_metadata", +] diff --git a/docs/_extensions/json_output/content/extractor.py b/docs/_extensions/json_output/content/extractor.py new file mode 100644 index 0000000000..d8e88f03e3 --- /dev/null +++ b/docs/_extensions/json_output/content/extractor.py @@ -0,0 +1,230 @@ +"""Main content extraction orchestration.""" + +from typing import Any + +from docutils import nodes +from sphinx.environment import BuildEnvironment +from sphinx.util import logging + +from .structured import extract_code_blocks, extract_headings, extract_images, extract_links +from .text import ( + clean_text_for_llm, + extract_clean_text_content, + extract_keywords, + extract_raw_markdown, + extract_summary, + extract_text_content, +) + +logger = logging.getLogger(__name__) + + +def extract_document_content(env: BuildEnvironment, docname: str, content_cache: dict) -> dict[str, Any]: + """Extract content from document optimized for LLM/search use cases.""" + if docname in content_cache: + return content_cache[docname] + + try: + logger.debug(f"Starting content extraction for {docname}") + doctree = env.get_doctree(docname) + + # Get extraction settings + extraction_settings = _get_extraction_settings(env) + + # Extract main content + content = _extract_main_content(doctree, env, docname, extraction_settings) + + # Extract additional features based on settings (pass env for link resolution) + _extract_additional_features(content, doctree, docname, extraction_settings, env) + + # Cache and return result + content_cache[docname] = content + logger.debug(f"Successfully extracted content for {docname}") + + except Exception: + logger.exception(f"Critical error extracting content from {docname}") + content = _get_empty_content_dict() + content_cache[docname] = content + + return content_cache[docname] + + +def _get_extraction_settings(env: BuildEnvironment) -> dict[str, bool]: + """Extract all extraction-related settings from environment config.""" + config = getattr(env.app, "config", None) + json_settings = getattr(config, "json_output_settings", {}) if config else {} + + return { + "fast_extraction": json_settings.get("fast_text_extraction", False), + "lazy_extraction": json_settings.get("lazy_extraction", False), + "skip_complex": json_settings.get("skip_complex_parsing", False), + "filter_clutter": json_settings.get("filter_search_clutter", True), + } + + +def _extract_main_content( + doctree: nodes.document, env: BuildEnvironment, docname: str, settings: dict[str, bool] +) -> dict[str, Any]: + """Extract main text content with appropriate strategy.""" + content = {} + + try: + if settings["fast_extraction"]: + content["content"] = extract_text_content(doctree) + content["format"] = "text" + logger.debug(f"Fast text extraction for {docname}: {len(content['content'])} chars") + else: + content = _extract_with_fallbacks(doctree, env, docname) + + # Apply content filtering if enabled + if settings["filter_clutter"] and content.get("content"): + _apply_content_filtering(content, docname) + + except Exception as e: # noqa: BLE001 + logger.warning(f"Error extracting main content from {docname}: {e}") + content = {"content": "", "format": "text"} + + return content + + +def _extract_with_fallbacks(doctree: nodes.document, env: BuildEnvironment, docname: str) -> dict[str, Any]: + """Extract content with multiple fallback strategies.""" + # Try clean text first (pass env for link title resolution) + clean_text = extract_clean_text_content(doctree, env) + if clean_text: + logger.debug(f"Extracted clean text content for {docname}: {len(clean_text)} chars") + return {"content": clean_text, "format": "text"} + + # Fallback to raw markdown + raw_markdown = extract_raw_markdown(env, docname) + if raw_markdown: + logger.debug(f"Fallback to raw markdown for {docname}: {len(raw_markdown)} chars") + return {"content": raw_markdown, "format": "markdown"} + + # Final fallback to basic text + logger.debug(f"Fallback to basic text extraction for {docname}") + return {"content": extract_text_content(doctree), "format": "text"} + + +def _apply_content_filtering(content: dict[str, Any], docname: str) -> None: + """Apply content filtering to remove clutter.""" + original_length = len(content["content"]) + content["content"] = clean_text_for_llm(content["content"]) + filtered_length = len(content["content"]) + + if original_length != filtered_length: + logger.debug(f"Content filtering for {docname}: {original_length} -> {filtered_length} chars") + + +def _extract_additional_features( + content: dict[str, Any], + doctree: nodes.document, + docname: str, + settings: dict[str, bool], + env: BuildEnvironment | None = None, +) -> None: + """Extract additional features based on extraction settings.""" + if settings["lazy_extraction"]: + _set_empty_additional_features(content) + return + + # Extract basic features + _extract_basic_features(content, doctree, docname) + + # Extract complex features if not skipped + if not settings["skip_complex"]: + _extract_complex_features(content, doctree, docname, env) + else: + _set_empty_complex_features(content) + + # Extract keywords if not lazy + if not settings["lazy_extraction"]: + _extract_keywords_feature(content, docname) + else: + content["keywords"] = [] + + +def _extract_basic_features(content: dict[str, Any], doctree: nodes.document, docname: str) -> None: + """Extract basic features: headings and summary.""" + features = [ + ("headings", extract_headings, []), + ("summary", extract_summary, ""), + ] + + for feature_name, extract_func, default_value in features: + try: + result = extract_func(doctree) + content[feature_name] = result + if feature_name == "headings": + logger.debug(f"Extracted {len(result)} headings from {docname}") + except Exception as e: # noqa: BLE001, PERF203 + logger.warning(f"Error extracting {feature_name} from {docname}: {e}") + content[feature_name] = default_value + + +def _extract_complex_features( + content: dict[str, Any], + doctree: nodes.document, + docname: str, + env: BuildEnvironment | None = None, +) -> None: + """Extract complex features: code blocks, links, and images.""" + # Code blocks and images don't need env + simple_features = [ + ("code_blocks", extract_code_blocks), + ("images", extract_images), + ] + + for feature_name, extract_func in simple_features: + try: + result = extract_func(doctree) + content[feature_name] = result + logger.debug(f"Extracted {len(result)} {feature_name} from {docname}") + except Exception as e: # noqa: BLE001, PERF203 + logger.warning(f"Error extracting {feature_name} from {docname}: {e}") + content[feature_name] = [] + + # Links need env for title resolution + try: + content["links"] = extract_links(doctree, env, docname) + logger.debug(f"Extracted {len(content['links'])} links from {docname}") + except Exception as e: # noqa: BLE001 + logger.warning(f"Error extracting links from {docname}: {e}") + content["links"] = [] + + +def _extract_keywords_feature(content: dict[str, Any], docname: str) -> None: + """Extract keywords from content and headings.""" + try: + content["keywords"] = extract_keywords(content.get("content", ""), content.get("headings", [])) + logger.debug(f"Extracted {len(content['keywords'])} keywords from {docname}") + except Exception as e: # noqa: BLE001 + logger.warning(f"Error extracting keywords from {docname}: {e}") + content["keywords"] = [] + + +def _set_empty_additional_features(content: dict[str, Any]) -> None: + """Set empty values for all additional features (lazy extraction).""" + features = ["headings", "summary", "code_blocks", "links", "images", "keywords"] + for feature in features: + content[feature] = [] if feature != "summary" else "" + + +def _set_empty_complex_features(content: dict[str, Any]) -> None: + """Set empty values for complex features only.""" + for feature in ["code_blocks", "links", "images"]: + content[feature] = [] + + +def _get_empty_content_dict() -> dict[str, Any]: + """Get empty content dictionary for error cases.""" + return { + "content": "", + "format": "text", + "headings": [], + "summary": "", + "code_blocks": [], + "links": [], + "images": [], + "keywords": [], + } diff --git a/docs/_extensions/json_output/content/metadata.py b/docs/_extensions/json_output/content/metadata.py new file mode 100644 index 0000000000..cb21a6052f --- /dev/null +++ b/docs/_extensions/json_output/content/metadata.py @@ -0,0 +1,79 @@ +"""Metadata and frontmatter extraction functions.""" + +from typing import Any + +from sphinx.environment import BuildEnvironment +from sphinx.util import logging + +# Import YAML at module level with error handling +try: + import yaml + + YAML_AVAILABLE = True +except ImportError: + YAML_AVAILABLE = False + yaml = None + +logger = logging.getLogger(__name__) + + +def extract_document_metadata( + env: BuildEnvironment, docname: str, metadata_cache: dict, frontmatter_cache: dict +) -> dict[str, Any]: + """Extract metadata from document with caching.""" + if docname in metadata_cache: + return metadata_cache[docname] + + metadata = {} + + try: + if hasattr(env, "metadata") and docname in env.metadata: + metadata.update(env.metadata[docname]) + + source_path = env.doc2path(docname) + if source_path and str(source_path).endswith(".md"): + frontmatter = extract_frontmatter(str(source_path), frontmatter_cache) + if frontmatter: + metadata.update(frontmatter) + + metadata_cache[docname] = metadata + logger.debug(f"Successfully extracted metadata for {docname}: {len(metadata)} items") + + except Exception as e: # noqa: BLE001 + logger.warning(f"Error extracting metadata from {docname}: {e}") + metadata_cache[docname] = {} + + return metadata_cache[docname] + + +def extract_frontmatter(file_path: str, frontmatter_cache: dict) -> dict[str, Any] | None: + """Extract YAML frontmatter from markdown files.""" + if file_path in frontmatter_cache: + return frontmatter_cache[file_path] + + result = None + + # Check prerequisites + if not YAML_AVAILABLE: + logger.debug("PyYAML not available, skipping frontmatter extraction") + else: + try: + with open(file_path, encoding="utf-8") as f: + content = f.read() + + # Check for valid frontmatter format + if content.startswith("---"): + end_marker = content.find("\n---\n", 3) + if end_marker != -1: + frontmatter_text = content[3:end_marker] + result = yaml.safe_load(frontmatter_text) + + except yaml.YAMLError as e: + logger.warning(f"YAML parsing error in frontmatter for {file_path}: {e}") + result = None + except Exception as e: # noqa: BLE001 + logger.debug(f"Could not extract frontmatter from {file_path}: {e}") + result = None + + frontmatter_cache[file_path] = result + return result diff --git a/docs/_extensions/json_output/content/structured.py b/docs/_extensions/json_output/content/structured.py new file mode 100644 index 0000000000..0742c324aa --- /dev/null +++ b/docs/_extensions/json_output/content/structured.py @@ -0,0 +1,384 @@ +"""Structured content extraction functions for headings, code blocks, links, and images.""" + +import re +from typing import TYPE_CHECKING, Any + +from docutils import nodes +from sphinx import addnodes +from sphinx.util import logging + +if TYPE_CHECKING: + from sphinx.environment import BuildEnvironment + +logger = logging.getLogger(__name__) + + +def extract_headings(doctree: nodes.document) -> list[dict[str, Any]]: + """Extract headings from document tree.""" + headings = [] + + # Extract headings from section nodes + for node in doctree.traverse(nodes.section): + # Get the title node + title_node = node.next_node(nodes.title) + if title_node: + title_text = title_node.astext().strip() + if title_text: + # Determine heading level based on nesting + level = 1 + parent = node.parent + while parent and isinstance(parent, nodes.section): + level += 1 + parent = parent.parent + + # Generate ID (similar to how Sphinx does it) + heading_id = re.sub(r"[^\w\-_]", "", title_text.lower().replace(" ", "-")) + + headings.append({"text": title_text, "level": level, "id": heading_id}) + + # Also check for standalone title nodes (like document title) + for node in doctree.traverse(nodes.title): + if node.parent and not isinstance(node.parent, nodes.section): + title_text = node.astext().strip() + if title_text: + heading_id = re.sub(r"[^\w\-_]", "", title_text.lower().replace(" ", "-")) + headings.append({"text": title_text, "level": 1, "id": heading_id}) + + # Remove duplicates while preserving order + seen = set() + unique_headings = [] + for heading in headings: + heading_key = (heading["text"], heading["level"]) + if heading_key not in seen: + seen.add(heading_key) + unique_headings.append(heading) + + return unique_headings + + +def extract_code_blocks(doctree: nodes.document) -> list[dict[str, Any]]: + """Extract code blocks from document tree.""" + code_blocks = [] + + for node in doctree.traverse(nodes.literal_block): + code_content = node.astext().strip() + if code_content: + # Try to determine language from classes or attributes + language = "text" # default + + if hasattr(node, "attributes") and "classes" in node.attributes: + classes = node.attributes["classes"] + for cls in classes: + if cls.startswith("language-"): + language = cls[9:] # Remove 'language-' prefix + break + elif cls in [ + "python", + "bash", + "javascript", + "json", + "yaml", + "sql", + "html", + "css", + "cpp", + "c", + "java", + "rust", + "go", + ]: + language = cls + break + + # Also check for highlight language + if hasattr(node, "attributes") and "highlight_args" in node.attributes: + highlight_args = node.attributes["highlight_args"] + if "language" in highlight_args: + language = highlight_args["language"] + + code_blocks.append({"content": code_content, "language": language}) + + return code_blocks + + +def extract_links( + doctree: nodes.document, + env: "BuildEnvironment | None" = None, + docname: str = "", +) -> list[dict[str, Any]]: + """Extract links from document tree with enhanced metadata. + + Args: + doctree: The document tree to extract links from + env: Optional Sphinx build environment for title resolution + docname: Current document name for relative URL resolution + + Returns: + List of link dictionaries with text, url, type, and optional metadata + """ + links = [] + + # Extract standard reference nodes + for node in doctree.traverse(nodes.reference): + link = _extract_reference_node(node, env, docname) + if link: + links.append(link) + + # Extract download reference nodes + for node in doctree.traverse(addnodes.download_reference): + link = _extract_download_reference(node) + if link: + links.append(link) + + return links + + +def _extract_reference_node( + node: nodes.reference, + env: "BuildEnvironment | None", + current_docname: str, +) -> dict[str, Any] | None: + """Extract metadata from a reference node.""" + link_text = node.astext().strip() + if not link_text: + return None + + attrs = getattr(node, "attributes", {}) + link: dict[str, Any] = {"text": link_text, "type": "internal"} + + # Extract URL from various attributes + if "refuri" in attrs: + link["url"] = attrs["refuri"] + # Classify link type + if attrs["refuri"].startswith(("http://", "https://", "ftp://", "mailto:")): + link["type"] = "external" + elif attrs["refuri"].startswith("#"): + link["type"] = "anchor" + else: + link["type"] = "internal" + # Normalize internal URLs + link["url"] = _normalize_internal_url(attrs["refuri"], current_docname) + elif "refid" in attrs: + link["url"] = f"#{attrs['refid']}" + link["type"] = "anchor" + elif "reftarget" in attrs: + link["url"] = attrs["reftarget"] + link["type"] = "internal" + + # Extract cross-reference metadata (from :ref:, :doc:, {ref}, {doc}, etc.) + if "refdoc" in attrs: + link["target_doc"] = attrs["refdoc"] + if link["type"] == "internal": + link["type"] = "cross_reference" + + if "reftype" in attrs: + link["ref_type"] = attrs["reftype"] + + # Try to improve link text if it looks like a filename + if env and _looks_like_filename(link_text): + better_text = _resolve_link_text(link_text, attrs, env) + if better_text and better_text != link_text: + link["text"] = better_text + link["original_text"] = link_text # Keep original for debugging + + # Only return if we have a URL or target_doc + if link.get("url") or link.get("target_doc"): + return link + return None + + +def _extract_download_reference(node: addnodes.download_reference) -> dict[str, Any] | None: + """Extract metadata from a download reference node.""" + link_text = node.astext().strip() + attrs = getattr(node, "attributes", {}) + + if not link_text: + return None + + link: dict[str, Any] = { + "text": link_text, + "type": "download", + } + + if "reftarget" in attrs: + link["url"] = attrs["reftarget"] + if "filename" in attrs: + link["filename"] = attrs["filename"] + + return link if link.get("url") else None + + +def _normalize_internal_url(url: str, current_docname: str) -> str: + """Normalize internal URLs to consistent format. + + Converts .md/.rst extensions to .html and resolves relative paths. + """ + if not url: + return url + + # Already absolute or external + if url.startswith(("/", "http://", "https://", "#")): + # Just normalize extension for absolute internal paths + if url.startswith("/"): + return _normalize_extension(url) + return url + + # Relative URL - resolve against current document + if current_docname: + # Get directory of current document + if "/" in current_docname: + base_dir = current_docname.rsplit("/", 1)[0] + url = f"{base_dir}/{url}" + + return _normalize_extension(url) + + +def _normalize_extension(url: str) -> str: + """Normalize file extensions to .html.""" + # Split off anchor if present + anchor = "" + if "#" in url: + url, anchor = url.rsplit("#", 1) + anchor = f"#{anchor}" + + # Replace source extensions with .html + for ext in (".md", ".rst", ".txt"): + if url.endswith(ext): + url = url[: -len(ext)] + ".html" + break + + # Add .html if no extension + if url and not url.endswith(".html") and "." not in url.rsplit("/", 1)[-1]: + url = url + ".html" + + return url + anchor + + +def _looks_like_filename(text: str) -> bool: + """Check if text looks like a filename/docname rather than readable text.""" + if not text: + return False + + # Single word with no spaces, possibly with path separators + if " " not in text and ("/" in text or text == text.lower()): + # But not if it's a reasonable title-like word + if len(text) > 2 and text[0].isupper() and text[1:].islower(): + return False + return True + + # Contains path separators + if "/" in text or "\\" in text: + return True + + # Ends with file extension + if re.search(r"\.(md|rst|html|txt)$", text, re.IGNORECASE): + return True + + return False + + +def _resolve_link_text( + text: str, + attrs: dict[str, Any], + env: "BuildEnvironment", +) -> str: + """Try to resolve a filename-like link text to a proper title.""" + # Try to get the target document name + target_doc = attrs.get("refdoc") or attrs.get("reftarget", "") + + # Clean up the target + target_doc = target_doc.replace(".html", "").replace(".md", "").replace(".rst", "") + + if target_doc and hasattr(env, "titles") and target_doc in env.titles: + title_node = env.titles[target_doc] + if title_node: + return title_node.astext().strip() + + # Fallback: humanize the filename + return _humanize_filename(text) + + +def _humanize_filename(filename: str) -> str: + """Convert a filename to human-readable text.""" + # Get just the filename part + if "/" in filename: + filename = filename.rsplit("/", 1)[-1] + + # Remove extension + for ext in (".md", ".rst", ".html", ".txt"): + if filename.endswith(ext): + filename = filename[: -len(ext)] + break + + # Replace separators with spaces + filename = filename.replace("-", " ").replace("_", " ") + + # Title case + return filename.title() + + +def extract_images(doctree: nodes.document) -> list[dict[str, Any]]: + """Extract images from document tree.""" + images = [] + + # Extract standalone images + images.extend(_extract_standalone_images(doctree)) + + # Extract images within figures + images.extend(_extract_figure_images(doctree)) + + return images + + +def _extract_standalone_images(doctree: nodes.document) -> list[dict[str, Any]]: + """Extract standalone image nodes.""" + images = [] + + for node in doctree.traverse(nodes.image): + if hasattr(node, "attributes"): + image_info = _build_image_info(node.attributes) + if image_info: + images.append(image_info) + + return images + + +def _extract_figure_images(doctree: nodes.document) -> list[dict[str, Any]]: + """Extract images from figure nodes.""" + images = [] + + for node in doctree.traverse(nodes.figure): + for img_node in node.traverse(nodes.image): + if hasattr(img_node, "attributes"): + image_info = _build_image_info(img_node.attributes) + if image_info: + # Add caption from figure + caption = _extract_figure_caption(node) + if caption: + image_info["caption"] = caption + images.append(image_info) + + return images + + +def _build_image_info(attrs: dict[str, Any]) -> dict[str, Any] | None: + """Build image info dictionary from attributes.""" + image_src = attrs.get("uri", "") + if not image_src: + return None + + image_info = {"src": image_src, "alt": attrs.get("alt", "")} + + # Add optional attributes + for attr_name in ["title", "width", "height"]: + if attr_name in attrs: + image_info[attr_name] = attrs[attr_name] + + return image_info + + +def _extract_figure_caption(figure_node: nodes.figure) -> str: + """Extract caption text from figure node.""" + for caption_node in figure_node.traverse(nodes.caption): + return caption_node.astext().strip() + return "" diff --git a/docs/_extensions/json_output/content/text.py b/docs/_extensions/json_output/content/text.py new file mode 100644 index 0000000000..5f810915a3 --- /dev/null +++ b/docs/_extensions/json_output/content/text.py @@ -0,0 +1,357 @@ +"""Text content extraction functions.""" + +import re +from typing import Any + +from docutils import nodes +from sphinx.environment import BuildEnvironment +from sphinx.util import logging + +logger = logging.getLogger(__name__) + +# Constants +MIN_SUBSTANTIAL_CONTENT_LENGTH = 50 +MAX_SUMMARY_LENGTH = 300 +MIN_KEYWORD_LENGTH = 3 +MAX_KEYWORDS_RETURNED = 50 + + +def extract_raw_markdown(env: BuildEnvironment, docname: str) -> str | None: + """Extract raw markdown from source file.""" + try: + source_path = env.doc2path(docname) + if not source_path or not source_path.exists(): + return None + + with open(source_path, encoding="utf-8") as f: + content = f.read() + + # Remove frontmatter if present + if content.startswith("---"): + end_marker = content.find("\n---\n", 3) + if end_marker != -1: + content = content[end_marker + 5 :] # Skip the second ---\n + + return content.strip() + + except Exception as e: # noqa: BLE001 + logger.debug(f"Could not extract raw markdown from {docname}: {e}") + return None + + +def extract_text_content(doctree: nodes.document) -> str: + """Extract plain text content from document tree.""" + text_parts = [] + + for node in doctree.traverse(nodes.Text): + text_parts.append(node.astext()) + + return " ".join(text_parts).strip() + + +def extract_clean_text_content(doctree: nodes.document, env: BuildEnvironment | None = None) -> str: + """Extract clean text content, filtering out navigation elements. + + Args: + doctree: The document tree to extract text from + env: Optional Sphinx environment for resolving link titles + + Returns: + Cleaned text content suitable for search/LLM consumption + """ + text_parts = [] + # Track nodes we've already processed (to avoid duplicate text from references) + processed_refs = set() + + for node in doctree.traverse(): + # Skip certain node types that aren't content + if isinstance(node, (nodes.target, nodes.substitution_definition)): + continue + + # Skip toctree and other directive content + if hasattr(node, "tagname") and node.tagname in ["toctree", "index", "meta"]: + continue + + # Handle reference nodes specially - extract and potentially improve link text + if isinstance(node, nodes.reference): + ref_id = id(node) + if ref_id not in processed_refs: + processed_refs.add(ref_id) + link_text = _get_improved_link_text(node, env) + if link_text: + text_parts.append(link_text) + continue + + # Extract text from text nodes (but skip if inside a reference we already processed) + if isinstance(node, nodes.Text): + # Check if this text node is inside a reference + parent = node.parent + if isinstance(parent, nodes.reference) and id(parent) in processed_refs: + continue # Already handled by reference processing + + text = node.astext().strip() + if text and not text.startswith("¶"): # Skip permalink symbols + text_parts.append(text) + + # Join and clean up the text + full_text = " ".join(text_parts) + + # Clean up whitespace + full_text = re.sub(r"\s+", " ", full_text) + + return full_text.strip() + + +def _get_improved_link_text(node: nodes.reference, env: BuildEnvironment | None) -> str: + """Get improved link text, resolving filenames to titles where possible.""" + text = node.astext().strip() + if not text: + return "" + + # If text doesn't look like a filename, use it as-is + if not _text_looks_like_filename(text): + return text + + # Try to resolve to a better title + attrs = getattr(node, "attributes", {}) + + # Try refdoc first (target document for cross-references) + target_doc = attrs.get("refdoc", "") + + # Try reftarget as fallback + if not target_doc: + target_doc = attrs.get("reftarget", "") + # Clean up the target + target_doc = target_doc.replace(".html", "").replace(".md", "").replace(".rst", "") + + # Look up title in env.titles + if target_doc and env and hasattr(env, "titles") and target_doc in env.titles: + title_node = env.titles[target_doc] + if title_node: + resolved_title = title_node.astext().strip() + if resolved_title: + return resolved_title + + # Fallback: humanize the filename + return _humanize_link_text(text) + + +def _text_looks_like_filename(text: str) -> bool: + """Check if text looks like a filename rather than readable text.""" + if not text: + return False + + # Contains path separators + if "/" in text or "\\" in text: + return True + + # Ends with file extension + if re.search(r"\.(md|rst|html|txt)$", text, re.IGNORECASE): + return True + + # Single lowercase word (like "index", "readme", "configuration") + if " " not in text and text == text.lower() and len(text) > 2: + # But allow proper nouns that happen to be lowercase in context + return True + + return False + + +def _humanize_link_text(text: str) -> str: + """Convert filename-like text to human-readable form.""" + # Get just the filename part + if "/" in text: + text = text.rsplit("/", 1)[-1] + + # Remove extension + for ext in (".md", ".rst", ".html", ".txt"): + if text.endswith(ext): + text = text[: -len(ext)] + break + + # Replace separators with spaces + text = text.replace("-", " ").replace("_", " ") + + # Title case + return text.title() + + +def clean_text_for_llm(text: str) -> str: + """Clean text content to make it more suitable for LLM processing and search indexing.""" + if not text: + return "" + + # Remove SVG content (common in documentation) + text = re.sub(r"]*>.*?", "", text, flags=re.DOTALL | re.IGNORECASE) + + # Remove HTML comments + text = re.sub(r"", "", text, flags=re.DOTALL) + + # Remove empty directive blocks (common MyST artifacts) + text = re.sub(r"^\s*```\{[^}]+\}\s*```\s*$", "", text, flags=re.MULTILINE) + + # Remove toctree artifacts + text = re.sub(r"^\s*:caption:.*$", "", text, flags=re.MULTILINE) + text = re.sub(r"^\s*:hidden:\s*$", "", text, flags=re.MULTILINE) + text = re.sub(r"^\s*:glob:\s*$", "", text, flags=re.MULTILINE) + text = re.sub(r"^\s*:maxdepth:\s*\d+\s*$", "", text, flags=re.MULTILINE) + + # Remove common MyST directive markers that aren't useful for search + text = re.sub(r"^\s*:::\{[^}]+\}\s*$", "", text, flags=re.MULTILINE) + text = re.sub(r"^\s*:::\s*$", "", text, flags=re.MULTILINE) + + # Clean up code block language indicators + text = re.sub(r"```(\w+)\s*\n", "```\n", text) + + # Remove excessive whitespace but preserve paragraph breaks + text = re.sub(r"\n\s*\n\s*\n+", "\n\n", text) # Multiple line breaks -> double + text = re.sub(r"[ \t]+", " ", text) # Multiple spaces/tabs -> single space + + # Remove lines that are just punctuation or symbols + lines = text.split("\n") + cleaned_lines = [] + for line in lines: + stripped = line.strip() + # Keep line if it has actual words (not just punctuation/symbols) + if stripped and re.search(r"[a-zA-Z0-9]", stripped): + # Remove standalone punctuation at start/end + stripped = re.sub(r"^[^\w\s]+\s*", "", stripped) + stripped = re.sub(r"\s*[^\w\s]+$", "", stripped) + if stripped: + cleaned_lines.append(stripped) + + text = "\n".join(cleaned_lines) + + # Final cleanup + return text.strip() + + +def extract_directive_content(directive_block: str) -> str: + """Extract meaningful content from MyST directive blocks.""" + if not directive_block: + return "" + + # Remove the directive syntax but keep the content + lines = directive_block.split("\n") + content_lines = [] + in_content = False + + for line in lines: + # Skip directive header lines + if line.strip().startswith(":::") or line.strip().startswith("```{"): + in_content = True + continue + elif line.strip() == ":::" or line.strip() == "```": + continue + elif line.strip().startswith(":") and not in_content: + # Skip directive options + continue + + # Include content lines + if in_content or not line.strip().startswith(":"): + content_lines.append(line) + + return "\n".join(content_lines).strip() + + +def extract_summary(doctree: nodes.document) -> str: + """Extract a summary from the document (first paragraph or section).""" + # Try to find the first substantial paragraph + for node in doctree.traverse(nodes.paragraph): + text = node.astext().strip() + if text and len(text) > MIN_SUBSTANTIAL_CONTENT_LENGTH: # Substantial content + # Clean and truncate + text = re.sub(r"\s+", " ", text) + if len(text) > MAX_SUMMARY_LENGTH: + text = text[:297] + "..." + return text + + # Fallback: use first MAX_SUMMARY_LENGTH characters of any text + text = extract_text_content(doctree) + if text: + text = re.sub(r"\s+", " ", text) + if len(text) > MAX_SUMMARY_LENGTH: + text = text[:297] + "..." + return text + + return "" + + +def extract_keywords(content: str, headings: list[dict[str, Any]]) -> list[str]: + """Extract relevant keywords from content for search optimization.""" + if not content: + return [] + + keywords = set() + + # Add heading text as keywords + for heading in headings: + if "text" in heading: + # Split heading into words and add significant ones + words = re.findall(r"\b[a-zA-Z]{3,}\b", heading["text"].lower()) + keywords.update(words) + + # Extract technical terms (often capitalized or have specific patterns) + # API names, class names, function names, etc. + tech_terms = re.findall(r"\b[A-Z][a-zA-Z0-9_]*[a-z][a-zA-Z0-9_]*\b", content) + keywords.update(term.lower() for term in tech_terms) + + # Extract quoted terms (often important concepts) + quoted_terms = re.findall(r'["`]([^"`]{3,20})["`]', content) + for term in quoted_terms: + if re.match(r"^[a-zA-Z][a-zA-Z0-9_\-\s]*$", term): + keywords.add(term.lower().strip()) + + # Extract common patterns for documentation keywords + # Configuration keys, file extensions, command names + config_keys = re.findall(r"\b[a-z_]+[a-z0-9_]*\s*[:=]", content) + keywords.update(key.rstrip(":=").strip() for key in config_keys) + + # File extensions + extensions = re.findall(r"\.[a-z]{2,4}\b", content.lower()) + keywords.update(ext.lstrip(".") for ext in extensions) + + # Remove common stop words and very short terms + stop_words = { + "the", + "and", + "for", + "are", + "but", + "not", + "you", + "all", + "can", + "had", + "her", + "was", + "one", + "our", + "out", + "day", + "get", + "has", + "him", + "his", + "how", + "its", + "may", + "new", + "now", + "old", + "see", + "two", + "who", + "boy", + "did", + "she", + "use", + "way", + "what", + "when", + "will", + } + keywords = {kw for kw in keywords if len(kw) >= MIN_KEYWORD_LENGTH and kw not in stop_words} + + # Return sorted list, limited to reasonable number + return sorted(keywords)[:MAX_KEYWORDS_RETURNED] diff --git a/docs/_extensions/json_output/core/__init__.py b/docs/_extensions/json_output/core/__init__.py new file mode 100644 index 0000000000..cc699ee8cc --- /dev/null +++ b/docs/_extensions/json_output/core/__init__.py @@ -0,0 +1,17 @@ +"""Core JSON output generation components.""" + +from .builder import JSONOutputBuilder +from .document_discovery import DocumentDiscovery +from .global_metadata import get_global_metadata +from .hierarchy_builder import HierarchyBuilder +from .json_formatter import JSONFormatter +from .json_writer import JSONWriter + +__all__ = [ + "DocumentDiscovery", + "HierarchyBuilder", + "JSONFormatter", + "JSONOutputBuilder", + "JSONWriter", + "get_global_metadata", +] diff --git a/docs/_extensions/json_output/core/builder.py b/docs/_extensions/json_output/core/builder.py new file mode 100644 index 0000000000..3f13b2dda6 --- /dev/null +++ b/docs/_extensions/json_output/core/builder.py @@ -0,0 +1,96 @@ +"""JSONOutputBuilder class for handling JSON output generation.""" + +from typing import Any + +from sphinx.application import Sphinx +from sphinx.util import logging + +from ..content import extract_document_content as _extract_document_content +from ..content import extract_document_metadata as _extract_document_metadata +from ..processing.cache import JSONOutputCache +from ..utils import get_setting, should_generate_json + +from .document_discovery import DocumentDiscovery +from .hierarchy_builder import HierarchyBuilder +from .json_formatter import JSONFormatter +from .json_writer import JSONWriter + +logger = logging.getLogger(__name__) + + +class JSONOutputBuilder: + """Handles JSON output generation for documents.""" + + def __init__(self, app: Sphinx): + self.app = app + self.env = app.env + self.config = app.config + + # Initialize cache manager + self.cache = JSONOutputCache() + + # Initialize modular components + self.document_discovery = DocumentDiscovery(app, self) + self.json_formatter = JSONFormatter(app, self) + self.json_writer = JSONWriter(app) + self.hierarchy_builder = HierarchyBuilder(app, self, self.document_discovery, self.json_formatter) + + def should_generate_json(self, docname: str) -> bool: + """Check if JSON should be generated for this document.""" + return should_generate_json(self.config, docname) + + def needs_update(self, docname: str) -> bool: + """Check if document needs to be updated based on modification time.""" + incremental_enabled = get_setting(self.config, "incremental_build", False) + source_path = self.env.doc2path(docname) + return self.cache.needs_update(docname, source_path, incremental_enabled) + + def mark_updated(self, docname: str) -> None: + """Mark document as processed with current timestamp.""" + source_path = self.env.doc2path(docname) + self.cache.mark_updated(docname, source_path) + + def extract_document_metadata(self, docname: str) -> dict[str, Any]: + """Extract metadata from document with caching.""" + return self.cache.with_cache_lock( + _extract_document_metadata, + self.env, + docname, + self.cache.get_metadata_cache(), + self.cache.get_frontmatter_cache(), + ) + + def extract_document_content(self, docname: str) -> dict[str, Any]: + """Extract content from document optimized for LLM/search use cases.""" + return self.cache.with_cache_lock(_extract_document_content, self.env, docname, self.cache.get_content_cache()) + + def build_json_data(self, docname: str) -> dict[str, Any]: + """Build optimized JSON data structure for LLM/search use cases.""" + # Use the JSON formatter for base data + data = self.json_formatter.build_json_data(docname) + + # Add children for directory indexes using hierarchy builder + self.hierarchy_builder.add_children_to_data(data, docname) + + return data + + def write_json_file(self, docname: str, data: dict[str, Any]) -> None: + """Write JSON data to file.""" + self.json_writer.write_json_file(docname, data) + + # Delegate methods to maintain API compatibility + def get_child_documents(self, parent_docname: str) -> list[str]: + """Get all child documents for a parent directory.""" + return self.document_discovery.get_child_documents(parent_docname) + + def is_hidden_document(self, docname: str) -> bool: + """Check if a document should be considered hidden.""" + return self.document_discovery.is_hidden_document(docname) + + def get_all_documents_recursive(self) -> list[str]: + """Get all non-hidden documents recursively.""" + return self.document_discovery.get_all_documents_recursive() + + def build_child_json_data(self, docname: str, include_content: bool | None = None) -> dict[str, Any]: + """Build optimized JSON data for child documents (LLM/search focused).""" + return self.json_formatter.build_child_json_data(docname, include_content) diff --git a/docs/_extensions/json_output/core/document_discovery.py b/docs/_extensions/json_output/core/document_discovery.py new file mode 100644 index 0000000000..02f1fbd055 --- /dev/null +++ b/docs/_extensions/json_output/core/document_discovery.py @@ -0,0 +1,115 @@ +"""Document discovery and filtering functionality.""" + +from typing import TYPE_CHECKING + +from sphinx.application import Sphinx + +from ..utils import get_setting + +if TYPE_CHECKING: + from .builder import JSONOutputBuilder + + +class DocumentDiscovery: + """Handles document discovery, filtering, and hierarchical relationships.""" + + def __init__(self, app: Sphinx, json_builder: "JSONOutputBuilder"): + self.app = app + self.env = app.env + self.config = app.config + self.json_builder = json_builder # Reference to main builder for metadata access + + def get_child_documents(self, parent_docname: str) -> list[str]: + """Get all child documents for a parent directory.""" + if parent_docname == "index": + parent_path = "" + elif parent_docname.endswith("/index"): + parent_path = parent_docname[:-6] # Remove '/index' + else: + # Not a directory index, no children + return [] + + children = [] + for docname in self.env.all_docs: + if self.is_hidden_document(docname): + continue + + # Skip the parent itself + if docname == parent_docname: + continue + + # Check if this document is a child of the parent + if parent_path == "": + # Root level - include all docs + children.append(docname) + elif docname.startswith(parent_path + "/"): + children.append(docname) + + return sorted(children) + + def is_hidden_document(self, docname: str) -> bool: + """Check if a document should be considered hidden.""" + # Skip documents that match exclude patterns + for pattern in get_setting(self.config, "exclude_patterns", []): + if docname.startswith(pattern): + return True + + # Skip documents with 'hidden' or 'draft' in metadata + metadata = self.json_builder.extract_document_metadata(docname) + if metadata.get("hidden") or metadata.get("draft"): + return True + + # Skip documents that wouldn't generate JSON + return not self.json_builder.should_generate_json(docname) + + def get_all_documents_recursive(self) -> list[str]: + """Get all non-hidden documents recursively.""" + all_docs = [] + for docname in self.env.all_docs: + if not self.is_hidden_document(docname): + all_docs.append(docname) + return sorted(all_docs) + + def get_section_path(self, docname: str) -> list[str]: + """Get hierarchical section path for navigation.""" + parts = docname.split("/") + + # Filter out common file names to get clean section path + filtered_parts = [] + for part in parts: + if part not in ["index", "README"]: + filtered_parts.append(part.replace("-", " ").replace("_", " ").title()) + + return filtered_parts + + def detect_document_type(self, docname: str, title: str, content: str) -> str: + """Detect document type for better search categorization.""" + docname_lower = docname.lower() + title_lower = title.lower() + content_lower = content.lower()[:1000] # First 1000 chars + + # Define document type checks in priority order + type_checks = [ + ("tutorial", lambda: "tutorial" in docname_lower or "tutorial" in title_lower), + ("guide", lambda: "guide" in docname_lower or "guide" in title_lower), + ("reference", lambda: "reference" in docname_lower or "api" in docname_lower), + ("example", lambda: "example" in docname_lower or "examples" in docname_lower), + ("troubleshooting", lambda: "troubleshoot" in docname_lower or "faq" in docname_lower), + ("installation", lambda: "install" in docname_lower or "setup" in docname_lower), + ("overview", lambda: docname.endswith("/index")), + ( + "tutorial", + lambda: any(word in content_lower for word in ["$ ", "pip install", "docker run", "git clone"]), + ), + ( + "reference", + lambda: any(word in content_lower for word in ["class ", "def ", "function", "method", "parameter"]), + ), + ] + + # Check each type in order and return the first match + for doc_type, check_func in type_checks: + if check_func(): + return doc_type + + return "documentation" diff --git a/docs/_extensions/json_output/core/global_metadata.py b/docs/_extensions/json_output/core/global_metadata.py new file mode 100644 index 0000000000..9644ea3dd3 --- /dev/null +++ b/docs/_extensions/json_output/core/global_metadata.py @@ -0,0 +1,147 @@ +"""Global metadata extraction from Sphinx configuration. + +This module provides functions to extract and build global metadata +from conf.py settings for inclusion in JSON output files. +""" + +import re +from typing import Any + +from sphinx.config import Config +from sphinx.util import logging + +logger = logging.getLogger(__name__) + + +def get_global_metadata(config: Config) -> dict[str, Any]: + """Build global metadata from Sphinx config settings. + + Combines explicit global_metadata settings with auto-inferred values + from standard Sphinx configuration (project, release, etc.). + + Args: + config: Sphinx configuration object + + Returns: + Dictionary with global metadata (book, product, site sections) + """ + settings = getattr(config, "json_output_settings", {}) + + # Start with explicit global_metadata if provided + global_meta = _deep_copy_dict(settings.get("global_metadata", {})) + + # Auto-infer if enabled + if settings.get("infer_global_metadata", True): + _infer_book_metadata(global_meta, config) + _infer_product_metadata(global_meta, config) + _infer_site_metadata(global_meta, config) + + # Remove empty sections + return {k: v for k, v in global_meta.items() if v} + + +def _deep_copy_dict(d: dict) -> dict: + """Create a deep copy of a nested dictionary.""" + result = {} + for k, v in d.items(): + if isinstance(v, dict): + result[k] = _deep_copy_dict(v) + elif isinstance(v, list): + result[k] = list(v) + else: + result[k] = v + return result + + +def _infer_book_metadata(global_meta: dict, config: Config) -> None: + """Infer book metadata from Sphinx config.""" + global_meta.setdefault("book", {}) + book = global_meta["book"] + + # book.title from project + if "title" not in book and hasattr(config, "project"): + book["title"] = config.project + + # book.version from release + if "version" not in book and hasattr(config, "release"): + book["version"] = config.release + + +def _infer_product_metadata(global_meta: dict, config: Config) -> None: + """Infer product metadata from Sphinx config.""" + global_meta.setdefault("product", {}) + product = global_meta["product"] + + # Try to get from html_context first (explicit config) + html_context = getattr(config, "html_context", {}) + + # product.name + if "name" not in product: + if html_context.get("product_name"): + product["name"] = html_context["product_name"] + elif hasattr(config, "project"): + product["name"] = _extract_product_name(config.project) + + # product.family + if "family" not in product and html_context.get("product_family"): + family = html_context["product_family"] + product["family"] = family if isinstance(family, list) else [family] + + # product.version (can differ from book.version) + if "version" not in product and hasattr(config, "release"): + product["version"] = config.release + + +def _infer_site_metadata(global_meta: dict, config: Config) -> None: + """Infer site metadata from Sphinx config.""" + html_context = getattr(config, "html_context", {}) + + # Only add site section if we have data + site_name = html_context.get("site_name") + if site_name: + global_meta.setdefault("site", {}) + if "name" not in global_meta["site"]: + global_meta["site"]["name"] = site_name + + +def _extract_product_name(project: str) -> str: + """Extract product name from project string. + + Examples: + 'NVIDIA DORI' -> 'DORI' + 'NVIDIA NeMo Curator User Guide' -> 'NeMo Curator' + 'NeMo Framework Documentation' -> 'NeMo Framework' + + Args: + project: The Sphinx project name + + Returns: + Extracted product name + """ + name = project + + # Remove NVIDIA prefix + name = re.sub(r"^NVIDIA\s+", "", name, flags=re.IGNORECASE) + + # Remove common documentation suffixes + suffixes = [ + r"\s+User Guide$", + r"\s+User Manual$", + r"\s+Developer Guide$", + r"\s+Documentation$", + r"\s+Reference$", + r"\s+Reference Guide$", + r"\s+API Reference$", + r"\s+Docs$", + ] + for suffix in suffixes: + name = re.sub(suffix, "", name, flags=re.IGNORECASE) + + return name.strip() + + + + + + + diff --git a/docs/_extensions/json_output/core/hierarchy_builder.py b/docs/_extensions/json_output/core/hierarchy_builder.py new file mode 100644 index 0000000000..d3604b99a9 --- /dev/null +++ b/docs/_extensions/json_output/core/hierarchy_builder.py @@ -0,0 +1,125 @@ +"""Hierarchy building for complex document structures like main index.""" + +from typing import TYPE_CHECKING, Any + +from sphinx.application import Sphinx +from sphinx.util import logging + +from ..utils import get_setting + +if TYPE_CHECKING: + from .builder import JSONOutputBuilder + from .document_discovery import DocumentDiscovery + from .json_formatter import JSONFormatter + +logger = logging.getLogger(__name__) + + +class HierarchyBuilder: + """Handles complex hierarchy building for indexes.""" + + def __init__( + self, + app: Sphinx, + json_builder: "JSONOutputBuilder", + document_discovery: "DocumentDiscovery", + json_formatter: "JSONFormatter", + ): + self.app = app + self.config = app.config + self.json_builder = json_builder + self.document_discovery = document_discovery + self.json_formatter = json_formatter + + def add_children_to_data(self, data: dict[str, Any], docname: str) -> None: + """Add children documents to data structure for directory indexes.""" + include_children = get_setting(self.config, "include_children", True) + if not include_children or not (docname == "index" or docname.endswith("/index")): + return + + if docname == "index": + self._handle_main_index(data, docname) + else: + self._handle_directory_index(data, docname) + + def _handle_main_index(self, data: dict[str, Any], docname: str) -> None: + """Handle main index behavior: optimized for search index generation.""" + main_index_mode = get_setting(self.config, "main_index_mode", "full") + max_main_index_docs = get_setting(self.config, "max_main_index_docs", 1000) + + if main_index_mode == "disabled": + logger.info("Main index children disabled by configuration") + data["children"] = [] + data["total_documents"] = 0 + elif main_index_mode == "metadata_only": + self._build_metadata_only_index(data, docname, max_main_index_docs) + else: # 'full' mode - comprehensive search index + self._build_full_search_index(data, docname, max_main_index_docs) + + def _build_metadata_only_index(self, data: dict[str, Any], docname: str, max_docs: int) -> None: + """Build metadata-only search index for main index page.""" + logger.info("Building metadata-only search index for main index page...") + all_docs = self.document_discovery.get_all_documents_recursive() + + # Apply document limit if set (0 = no limit) + if max_docs > 0: + all_docs = all_docs[:max_docs] + if len(self.document_discovery.get_all_documents_recursive()) > max_docs: + logger.info(f"Limited to {max_docs} documents (set max_main_index_docs to 0 for no limit)") + + # Build flat array of documents for search index + documents = [] + for child_docname in all_docs: + if child_docname != docname: # Don't include self + try: + child_data = self.json_formatter.build_child_json_data(child_docname, include_content=False) + documents.append(child_data) + except Exception as e: # noqa: BLE001 + logger.warning(f"Failed to build child metadata for {child_docname}: {e}") + + # Store as flat array - will be output as array at root level + data["_documents_array"] = documents + data["total_documents"] = len(self.document_discovery.get_all_documents_recursive()) + + logger.info(f"Generated metadata-only search index with {len(documents)} documents") + + def _build_full_search_index(self, data: dict[str, Any], docname: str, max_docs: int) -> None: + """Build comprehensive search index for main index page.""" + logger.info("Building comprehensive search index for main index page...") + all_docs = self.document_discovery.get_all_documents_recursive() + + # Apply document limit if set (0 = no limit) + if max_docs > 0: + all_docs = all_docs[:max_docs] + if len(self.document_discovery.get_all_documents_recursive()) > max_docs: + logger.info(f"Limited to {max_docs} documents (set max_main_index_docs to 0 for no limit)") + + # Build flat array of documents for search index + documents = [] + for child_docname in all_docs: + if child_docname != docname: # Don't include self + try: + child_data = self.json_formatter.build_child_json_data(child_docname) + documents.append(child_data) + except Exception as e: # noqa: BLE001 + logger.warning(f"Failed to build child data for {child_docname}: {e}") + + # Store as flat array - will be output as array at root level + data["_documents_array"] = documents + data["total_documents"] = len(self.document_discovery.get_all_documents_recursive()) + + logger.info(f"Generated comprehensive search index with {len(documents)} documents") + + def _handle_directory_index(self, data: dict[str, Any], docname: str) -> None: + """Handle directory index: gets direct children.""" + children = self.document_discovery.get_child_documents(docname) + data["children"] = [] + + for child_docname in children: + try: + child_data = self.json_formatter.build_child_json_data(child_docname) + data["children"].append(child_data) + except Exception as e: # noqa: BLE001, PERF203 + logger.warning(f"Failed to build child data for {child_docname}: {e}") + + logger.debug(f"Included {len(data['children'])} child documents for {docname}") diff --git a/docs/_extensions/json_output/core/json_formatter.py b/docs/_extensions/json_output/core/json_formatter.py new file mode 100644 index 0000000000..d312dbfce8 --- /dev/null +++ b/docs/_extensions/json_output/core/json_formatter.py @@ -0,0 +1,264 @@ +"""JSON data formatting and structure building.""" + +from datetime import datetime, timezone +from typing import TYPE_CHECKING, Any + +from docutils import nodes +from sphinx.application import Sphinx +from sphinx.util import logging + +from ..utils import get_document_url, get_setting + +from .document_discovery import DocumentDiscovery +from .global_metadata import get_global_metadata + +if TYPE_CHECKING: + from .builder import JSONOutputBuilder + +logger = logging.getLogger(__name__) + + +class JSONFormatter: + """Handles JSON data structure building and formatting.""" + + def __init__(self, app: Sphinx, json_builder: "JSONOutputBuilder"): + self.app = app + self.env = app.env + self.config = app.config + self.json_builder = json_builder + self._global_metadata: dict[str, Any] | None = None + + @property + def global_metadata(self) -> dict[str, Any]: + """Get cached global metadata from conf.py.""" + if self._global_metadata is None: + self._global_metadata = get_global_metadata(self.config) + return self._global_metadata + + def add_metadata_fields(self, data: dict[str, Any], metadata: dict[str, Any]) -> None: + """Add all metadata fields to JSON data structure. + + Supports both new nested schema and legacy flat fields for backwards compatibility. + New schema: topics, tags, industry, content.type, content.learning_level, content.audience, facets.modality + Legacy schema: categories, personas, difficulty, content_type, modality + """ + # Basic metadata fields + if metadata.get("description"): + data["description"] = metadata["description"] + + # Tags (same in both schemas) + if metadata.get("tags"): + data["tags"] = metadata["tags"] if isinstance(metadata["tags"], list) else [metadata["tags"]] + + # Topics (new schema) or categories (legacy) + topics = metadata.get("topics") or metadata.get("categories") + if topics: + data["topics"] = topics if isinstance(topics, list) else [topics] + + # Industry verticals + if metadata.get("industry"): + industry = metadata["industry"] + data["industry"] = industry if isinstance(industry, list) else [industry] + + if metadata.get("author"): + data["author"] = metadata["author"] + + # Content classification - support nested and flat structures + content = metadata.get("content", {}) + + # Content type: content.type (new) or content_type (legacy) + content_type = content.get("type") if isinstance(content, dict) else None + content_type = content_type or metadata.get("content_type") + if content_type: + data["content_type"] = content_type + + # Learning level: content.learning_level (new) or content.difficulty/difficulty (legacy) + learning_level = content.get("learning_level") if isinstance(content, dict) else None + learning_level = learning_level or content.get("difficulty") if isinstance(content, dict) else None + learning_level = learning_level or metadata.get("learning_level") or metadata.get("difficulty") + if learning_level: + data["learning_level"] = learning_level + + # Audience: content.audience (new) or personas (legacy) + audience = content.get("audience") if isinstance(content, dict) else None + audience = audience or metadata.get("personas") + if audience: + data["audience"] = audience if isinstance(audience, list) else [audience] + + # Keywords from frontmatter (takes priority over auto-extraction) + if metadata.get("keywords"): + keywords = metadata["keywords"] + data["keywords"] = keywords if isinstance(keywords, list) else [keywords] + + # Product-specific facets - dynamically extract all facet keys + facets = metadata.get("facets", {}) + if isinstance(facets, dict) and facets: + # Include all facets as a nested object + data["facets"] = facets + # Also flatten facets to top level for backwards compatibility and easier filtering + for facet_key, facet_value in facets.items(): + data[facet_key] = facet_value + + # Legacy flat modality support (if not already set via facets) + if "modality" not in data and metadata.get("modality"): + data["modality"] = metadata["modality"] + + # Content gating + if metadata.get("only"): + data["only"] = metadata["only"] + + def build_child_json_data(self, docname: str, include_content: bool | None = None) -> dict[str, Any]: + """Build optimized JSON data for child documents (LLM/search focused).""" + if include_content is None: + include_content = get_setting(self.config, "include_child_content", True) + + # Get document title + title = self.env.titles.get(docname, nodes.title()).astext() if docname in self.env.titles else "" + + # Extract metadata for tags/categories + metadata = self.json_builder.extract_document_metadata(docname) + content_data = self.json_builder.extract_document_content(docname) if include_content else {} + + # Build optimized data structure for search engines + data = { + "id": docname, # Use 'id' for search engines + "title": title, + "url": get_document_url(self.app, docname), + } + + # Add global metadata from conf.py (book, product, site) + self._add_global_metadata(data) + + # Add metadata fields from frontmatter + self.add_metadata_fields(data, metadata) + + # Add search-specific fields + if include_content: + self._add_content_fields(data, content_data, docname, title) + + return data + + def build_json_data(self, docname: str) -> dict[str, Any]: + """Build optimized JSON data structure for LLM/search use cases.""" + # Get document title + title = self.env.titles.get(docname, nodes.title()).astext() if docname in self.env.titles else "" + + # Extract metadata and content + metadata = self.json_builder.extract_document_metadata(docname) + content_data = self.json_builder.extract_document_content(docname) + + # Build data structure + data = { + "id": docname, + "title": title, + "url": get_document_url(self.app, docname), + "last_modified": datetime.now(timezone.utc).isoformat(), + } + + # Add global metadata from conf.py (book, product, site) + self._add_global_metadata(data) + + # Add metadata fields from frontmatter + self.add_metadata_fields(data, metadata) + + # Add content + if content_data.get("content"): + data["content"] = content_data["content"] + data["format"] = content_data.get("format", "text") + + if content_data.get("summary"): + data["summary"] = content_data["summary"] + + if content_data.get("headings"): + data["headings"] = [{"text": h["text"], "level": h["level"]} for h in content_data["headings"]] + + return data + + def _add_global_metadata(self, data: dict[str, Any]) -> None: + """Inject global site/book/product metadata from conf.py.""" + for key, value in self.global_metadata.items(): + if value: # Only add non-empty values + data[key] = value + + def _add_content_fields( + self, data: dict[str, Any], content_data: dict[str, Any], docname: str, title: str + ) -> None: + """Add content-related fields to JSON data.""" + self._add_primary_content(data, content_data) + self._add_summary_content(data, content_data) + self._add_headings_content(data, content_data) + self._add_optional_features(data, content_data) + self._add_document_metadata(data, content_data, docname, title) + + def _add_primary_content(self, data: dict[str, Any], content_data: dict[str, Any]) -> None: + """Add primary content with length limits.""" + if not content_data.get("content"): + return + + content_max_length = get_setting(self.config, "content_max_length", 50000) + content = content_data["content"] + + if content_max_length > 0 and len(content) > content_max_length: + content = content[:content_max_length] + "..." + + data["content"] = content + data["format"] = content_data.get("format", "text") + data["content_length"] = len(content_data["content"]) # Original length + data["word_count"] = len(content_data["content"].split()) if content_data["content"] else 0 + + def _add_summary_content(self, data: dict[str, Any], content_data: dict[str, Any]) -> None: + """Add summary with length limits.""" + if not content_data.get("summary"): + return + + summary_max_length = get_setting(self.config, "summary_max_length", 500) + summary = content_data["summary"] + + if summary_max_length > 0 and len(summary) > summary_max_length: + summary = summary[:summary_max_length] + "..." + + data["summary"] = summary + + def _add_headings_content(self, data: dict[str, Any], content_data: dict[str, Any]) -> None: + """Add headings for structure/navigation.""" + if not content_data.get("headings"): + return + + # Simplify headings for LLM use + data["headings"] = [ + {"text": h["text"], "level": h["level"], "id": h.get("id", "")} for h in content_data["headings"] + ] + # Add searchable heading text + data["headings_text"] = " ".join([h["text"] for h in content_data["headings"]]) + + def _add_optional_features(self, data: dict[str, Any], content_data: dict[str, Any]) -> None: + """Add optional search enhancement features.""" + # Keywords: frontmatter takes priority, then auto-extraction + if "keywords" not in data: # Not already set from frontmatter + if get_setting(self.config, "extract_keywords", True) and "keywords" in content_data: + keywords_max_count = get_setting(self.config, "keywords_max_count", 50) + keywords = ( + content_data["keywords"][:keywords_max_count] if keywords_max_count > 0 else content_data["keywords"] + ) + data["keywords"] = keywords + + if get_setting(self.config, "extract_code_blocks", True) and "code_blocks" in content_data: + data["code_blocks"] = content_data["code_blocks"] + + if get_setting(self.config, "extract_links", True) and "links" in content_data: + data["links"] = content_data["links"] + + if get_setting(self.config, "extract_images", True) and "images" in content_data: + data["images"] = content_data["images"] + + def _add_document_metadata( + self, data: dict[str, Any], content_data: dict[str, Any], docname: str, title: str + ) -> None: + """Add document type and section metadata.""" + if get_setting(self.config, "include_doc_type", True): + discovery = DocumentDiscovery(self.app, self.json_builder) + data["doc_type"] = discovery.detect_document_type(docname, title, content_data.get("content", "")) + + if get_setting(self.config, "include_section_path", True): + discovery = DocumentDiscovery(self.app, self.json_builder) + data["section_path"] = discovery.get_section_path(docname) diff --git a/docs/_extensions/json_output/core/json_writer.py b/docs/_extensions/json_output/core/json_writer.py new file mode 100644 index 0000000000..1bfdbae3b5 --- /dev/null +++ b/docs/_extensions/json_output/core/json_writer.py @@ -0,0 +1,88 @@ +"""JSON file writing and output operations.""" + +import json +from pathlib import Path +from typing import Any + +from sphinx.application import Sphinx +from sphinx.util import logging + +from ..utils import get_setting + +logger = logging.getLogger(__name__) + + +class JSONWriter: + """Handles JSON file writing operations.""" + + def __init__(self, app: Sphinx): + self.app = app + self.config = app.config + + def write_json_file(self, docname: str, data: dict[str, Any]) -> None: + """Write JSON data to file.""" + try: + outdir = Path(self.app.outdir) + + if docname == "index": + json_path = outdir / "index.json" + elif docname.endswith("/index"): + json_path = outdir / docname[:-6] / "index.json" + else: + json_path = outdir / f"{docname}.json" + + json_path.parent.mkdir(parents=True, exist_ok=True) + + # For main index.json, output as array of page objects + if docname == "index" and "_documents_array" in data: + self._write_array_index(json_path, data) + # Handle separate content files option + elif get_setting(self.config, "separate_content", False) and "content" in data: + self._write_separate_content(json_path, data) + else: + self._write_single_file(json_path, data) + + logger.debug(f"Generated JSON: {json_path}") + + except Exception: + logger.exception(f"Failed to write JSON for {docname}") + + def _write_array_index(self, json_path: Path, data: dict[str, Any]) -> None: + """Write main index.json as an array of page objects for search engines.""" + # Extract the documents array and write as root-level array + documents = data.get("_documents_array", []) + self._write_json_data(json_path, documents) + logger.info(f"Generated search index array with {len(documents)} documents") + + def _write_separate_content(self, json_path: Path, data: dict[str, Any]) -> None: + """Write content to separate file when separate_content is enabled.""" + # Write content to separate file + content_path = json_path.with_suffix(".content.json") + content_data = { + "id": data["id"], + "content": data["content"], + "format": data.get("format", "text"), + "content_length": data.get("content_length", 0), + "word_count": data.get("word_count", 0), + } + + self._write_json_data(content_path, content_data) + + # Remove content from main data and add reference + main_data = data.copy() + del main_data["content"] + main_data["content_file"] = str(content_path.name) + + self._write_json_data(json_path, main_data) + + def _write_single_file(self, json_path: Path, data: dict[str, Any]) -> None: + """Write all data to a single JSON file.""" + self._write_json_data(json_path, data) + + def _write_json_data(self, file_path: Path, data: dict[str, Any]) -> None: + """Write JSON data to file with appropriate formatting.""" + with open(file_path, "w", encoding="utf-8") as f: + if get_setting(self.config, "minify_json", False): + json.dump(data, f, ensure_ascii=False, separators=(",", ":")) + else: + json.dump(data, f, ensure_ascii=False, indent=2) diff --git a/docs/_extensions/json_output/processing/__init__.py b/docs/_extensions/json_output/processing/__init__.py new file mode 100644 index 0000000000..7e0a1fdd6b --- /dev/null +++ b/docs/_extensions/json_output/processing/__init__.py @@ -0,0 +1,12 @@ +"""Processing pipeline and orchestration components.""" + +from .cache import JSONOutputCache +from .processor import on_build_finished, process_document, process_documents_parallel, process_documents_sequential + +__all__ = [ + "JSONOutputCache", + "on_build_finished", + "process_document", + "process_documents_parallel", + "process_documents_sequential", +] diff --git a/docs/_extensions/json_output/processing/cache.py b/docs/_extensions/json_output/processing/cache.py new file mode 100644 index 0000000000..ce79d8c70a --- /dev/null +++ b/docs/_extensions/json_output/processing/cache.py @@ -0,0 +1,94 @@ +"""Caching and incremental build support for JSON output extension.""" + +from collections.abc import Callable +from pathlib import Path +from threading import Lock +from typing import Any, ClassVar + +from sphinx.util import logging + +logger = logging.getLogger(__name__) + + +class JSONOutputCache: + """Manages caching and incremental builds for JSON output.""" + + # Class-level shared caches with thread safety + _shared_cache_lock = Lock() + _shared_metadata_cache: ClassVar[dict[str, Any]] = {} + _shared_frontmatter_cache: ClassVar[dict[str, Any]] = {} + _shared_content_cache: ClassVar[dict[str, Any]] = {} + _file_timestamps: ClassVar[dict[str, float]] = {} # Track file modification times + + def __init__(self): + """Initialize cache instance with shared caches.""" + with self._shared_cache_lock: + self._metadata_cache = self._shared_metadata_cache + self._frontmatter_cache = self._shared_frontmatter_cache + self._content_cache = self._shared_content_cache + self._timestamps = self._file_timestamps + + def get_metadata_cache(self) -> dict[str, Any]: + """Get the metadata cache.""" + return self._metadata_cache + + def get_frontmatter_cache(self) -> dict[str, Any]: + """Get the frontmatter cache.""" + return self._frontmatter_cache + + def get_content_cache(self) -> dict[str, Any]: + """Get the content cache.""" + return self._content_cache + + def needs_update(self, docname: str, source_path: Path, incremental_enabled: bool = False) -> bool: + """Check if document needs to be updated based on modification time.""" + if not incremental_enabled: + return True # Process all files if incremental build is disabled + + try: + if not source_path or not source_path.exists(): + return True + + current_mtime = source_path.stat().st_mtime + + # Check if we have a recorded timestamp + if docname in self._timestamps: + return current_mtime > self._timestamps[docname] + else: + # First time processing this file + self._timestamps[docname] = current_mtime + return True + + except Exception as e: # noqa: BLE001 + logger.debug(f"Error checking modification time for {docname}: {e}") + return True # Process if we can't determine modification time + + def mark_updated(self, docname: str, source_path: Path) -> None: + """Mark document as processed with current timestamp.""" + try: + if source_path and source_path.exists(): + self._timestamps[docname] = source_path.stat().st_mtime + except Exception: # noqa: BLE001 + logger.debug(f"Could not update timestamp for {docname}") + + def clear_caches(self) -> None: + """Clear all caches (useful for testing or memory cleanup).""" + with self._shared_cache_lock: + self._metadata_cache.clear() + self._frontmatter_cache.clear() + self._content_cache.clear() + self._timestamps.clear() + + def get_cache_stats(self) -> dict[str, int]: + """Get cache statistics for debugging.""" + return { + "metadata_cache_size": len(self._metadata_cache), + "frontmatter_cache_size": len(self._frontmatter_cache), + "content_cache_size": len(self._content_cache), + "timestamps_size": len(self._timestamps), + } + + def with_cache_lock(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any: # noqa: ANN401 + """Execute function with cache lock held.""" + with self._shared_cache_lock: + return func(*args, **kwargs) diff --git a/docs/_extensions/json_output/processing/processor.py b/docs/_extensions/json_output/processing/processor.py new file mode 100644 index 0000000000..cb3ec29fc3 --- /dev/null +++ b/docs/_extensions/json_output/processing/processor.py @@ -0,0 +1,199 @@ +"""Document processing and build orchestration for JSON output extension.""" + +import multiprocessing +from collections.abc import Callable +from concurrent.futures import ThreadPoolExecutor + +from sphinx.application import Sphinx +from sphinx.config import Config +from sphinx.util import logging + +from ..core.builder import JSONOutputBuilder +from ..utils import get_setting, validate_content_gating_integration + +logger = logging.getLogger(__name__) + + +def on_build_finished(app: Sphinx, exception: Exception) -> None: + """Generate JSON files after HTML build is complete.""" + if exception is not None: + return + + verbose = get_setting(app.config, "verbose", False) + log_func = logger.info if verbose else logger.debug + log_func("Generating JSON output files...") + + # Setup and validation + json_builder = _setup_json_builder(app) + if not json_builder: + return + + # Get and filter documents + all_docs = _filter_documents(app, json_builder, log_func) + + # Process documents + generated_count, failed_count = _process_documents(app, json_builder, all_docs, log_func) + + # Final logging + _log_results(log_func, generated_count, failed_count) + + +def _setup_json_builder(app: Sphinx) -> JSONOutputBuilder | None: + """Setup and validate JSON builder.""" + validate_content_gating_integration(app) + + try: + return JSONOutputBuilder(app) + except Exception: + logger.exception("Failed to initialize JSONOutputBuilder") + return None + + +def _filter_documents(app: Sphinx, json_builder: JSONOutputBuilder, log_func: Callable[[str], None]) -> list[str]: + """Filter documents based on gating, incremental build, and size limits.""" + all_docs, gated_docs = _get_initial_documents(app, json_builder) + + if gated_docs: + log_func(f"Content gating: excluding {len(gated_docs)} documents from JSON generation") + verbose = get_setting(app.config, "verbose", False) + if verbose and gated_docs: + logger.debug(f"Gated documents: {', '.join(sorted(gated_docs))}") + + all_docs = _apply_incremental_filtering(app, json_builder, all_docs, log_func) + return _apply_size_filtering(app, all_docs, log_func) + + +def _get_initial_documents(app: Sphinx, json_builder: JSONOutputBuilder) -> tuple[list[str], list[str]]: + """Get initial document lists, separating processable from gated documents.""" + all_docs = [] + gated_docs = [] + + for docname in app.env.all_docs: + if json_builder.should_generate_json(docname): + all_docs.append(docname) + else: + gated_docs.append(docname) + + return all_docs, gated_docs + + +def _apply_incremental_filtering( + app: Sphinx, json_builder: JSONOutputBuilder, all_docs: list[str], log_func: Callable[[str], None] +) -> list[str]: + """Apply incremental build filtering if enabled.""" + if not get_setting(app.config, "incremental_build", False): + return all_docs + + incremental_docs = [docname for docname in all_docs if json_builder.needs_update(docname)] + skipped_count = len(all_docs) - len(incremental_docs) + if skipped_count > 0: + log_func(f"Incremental build: skipping {skipped_count} unchanged files") + return incremental_docs + + +def _apply_size_filtering(app: Sphinx, all_docs: list[str], log_func: Callable[[str], None]) -> list[str]: + """Apply file size filtering if enabled.""" + skip_large_files = get_setting(app.config, "skip_large_files", 0) + if skip_large_files <= 0: + return all_docs + + filtered_docs = [] + for docname in all_docs: + try: + source_path = app.env.doc2path(docname) + if source_path and source_path.stat().st_size <= skip_large_files: + filtered_docs.append(docname) + else: + log_func(f"Skipping large file: {docname} ({source_path.stat().st_size} bytes)") + except Exception: # noqa: BLE001, PERF203 + filtered_docs.append(docname) # Include if we can't check size + return filtered_docs + + +def _process_documents( + app: Sphinx, json_builder: JSONOutputBuilder, all_docs: list[str], log_func: Callable[[str], None] +) -> tuple[int, int]: + """Process documents either in parallel or sequentially.""" + if get_setting(app.config, "parallel", False): + return process_documents_parallel(json_builder, all_docs, app.config, log_func) + else: + return process_documents_sequential(json_builder, all_docs) + + +def _log_results(log_func: Callable[[str], None], generated_count: int, failed_count: int) -> None: + """Log final processing results.""" + log_func(f"Generated {generated_count} JSON files") + if failed_count > 0: + logger.warning(f"Failed to generate {failed_count} JSON files") + + +def process_documents_parallel( + json_builder: JSONOutputBuilder, all_docs: list[str], config: Config, log_func: Callable[[str], None] +) -> tuple[int, int]: + """Process documents in parallel batches.""" + parallel_workers = get_setting(config, "parallel_workers", "auto") + if parallel_workers == "auto": + cpu_count = multiprocessing.cpu_count() or 1 + max_workers = min(cpu_count, 8) # Limit to 8 threads max + else: + max_workers = min(int(parallel_workers), 16) # Cap at 16 for safety + + batch_size = get_setting(config, "batch_size", 50) + + generated_count = 0 + failed_count = 0 + + # Process in batches to control memory usage + for i in range(0, len(all_docs), batch_size): + batch_docs = all_docs[i : i + batch_size] + log_func( + f"Processing batch {i // batch_size + 1}/{(len(all_docs) - 1) // batch_size + 1} ({len(batch_docs)} docs)" + ) + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = {} + for docname in batch_docs: + future = executor.submit(process_document, json_builder, docname) + futures[future] = docname + + for future, docname in futures.items(): + try: + if future.result(): + generated_count += 1 + else: + failed_count += 1 + except Exception: # noqa: PERF203 + logger.exception(f"Error generating JSON for {docname}") + failed_count += 1 + + return generated_count, failed_count + + +def process_documents_sequential(json_builder: JSONOutputBuilder, all_docs: list[str]) -> tuple[int, int]: + """Process documents sequentially.""" + generated_count = 0 + failed_count = 0 + + for docname in all_docs: + try: + json_data = json_builder.build_json_data(docname) + json_builder.write_json_file(docname, json_data) + generated_count += 1 + except Exception: # noqa: PERF203 + logger.exception(f"Error generating JSON for {docname}") + failed_count += 1 + + return generated_count, failed_count + + +def process_document(json_builder: JSONOutputBuilder, docname: str) -> bool: + """Process a single document for parallel execution.""" + try: + json_data = json_builder.build_json_data(docname) + json_builder.write_json_file(docname, json_data) + json_builder.mark_updated(docname) # Mark as processed for incremental builds + except Exception: + logger.exception(f"Error generating JSON for {docname}") + return False + else: + return True diff --git a/docs/_extensions/json_output/utils.py b/docs/_extensions/json_output/utils.py new file mode 100644 index 0000000000..17ee6ee5ae --- /dev/null +++ b/docs/_extensions/json_output/utils.py @@ -0,0 +1,122 @@ +"""Utility functions for JSON output.""" + +import fnmatch +from typing import Any + +from sphinx.application import Sphinx +from sphinx.config import Config +from sphinx.util import logging + +logger = logging.getLogger(__name__) + + +def validate_content_gating_integration(app: Sphinx) -> None: + """Validate that content gating integration is working properly.""" + # Check if content_gating extension is loaded + if "content_gating" in app.extensions: + logger.info("Content gating extension detected - JSON output will respect content gating rules") + else: + logger.debug("Content gating extension not detected - JSON output will process all documents") + + # Log current exclude patterns for debugging + exclude_patterns = getattr(app.config, "exclude_patterns", []) + if exclude_patterns: + logger.debug(f"Current exclude patterns: {exclude_patterns}") + + # Check current build tags for debugging + if hasattr(app, "tags"): + try: + current_tags = set(app.tags) + if current_tags: + logger.info(f"Active build tags: {current_tags}") + else: + logger.info("No build tags active") + except (TypeError, AttributeError): + logger.debug("Could not determine active build tags") + + +def get_setting(config: Config, key: str, default: Any = None) -> Any: # noqa: ANN401 + """Get a setting from json_output_settings with fallback to old config names.""" + settings = getattr(config, "json_output_settings", {}) + + # Try new settings format first + if key in settings: + return settings[key] + + # Fallback to old config names for backward compatibility + old_config_map = { + "enabled": "json_output_enabled", + "exclude_patterns": "json_output_exclude_patterns", + "verbose": "json_output_verbose", + "parallel": "json_output_parallel", + "include_children": "json_output_include_children", + "include_child_content": "json_output_include_child_content", + "main_index_mode": "json_output_main_index_mode", + "max_main_index_docs": "json_output_max_main_index_docs", + } + + old_key = old_config_map.get(key) + if old_key and hasattr(config, old_key): + return getattr(config, old_key) + + return default + + +def is_content_gated(config: Config, docname: str) -> bool: + """ + Check if a document is content gated by checking Sphinx's exclude_patterns. + This works with the content_gating extension that adds restricted documents + to exclude_patterns during config-inited event. + """ + sphinx_exclude_patterns = getattr(config, "exclude_patterns", []) + if not sphinx_exclude_patterns: + return False + + # Convert docname to potential file paths that might be in exclude_patterns + possible_paths = [docname + ".md", docname + ".rst", docname] + + for possible_path in possible_paths: + # Check if this path matches any exclude pattern using fnmatch (supports glob patterns) + for pattern in sphinx_exclude_patterns: + if isinstance(pattern, str) and fnmatch.fnmatch(possible_path, pattern): + logger.debug(f"Document {docname} is content gated (matches pattern: {pattern})") + return True + + return False + + +def should_generate_json(config: Config, docname: str) -> bool: + """Check if JSON should be generated for this document.""" + if not get_setting(config, "enabled", True): + return False + + if not docname or not isinstance(docname, str): + logger.warning(f"Invalid docname for JSON generation: {docname}") + return False + + # CRITICAL: Check content gating first - if document is content gated, don't generate JSON + if is_content_gated(config, docname): + logger.info(f"Excluding {docname} from JSON generation due to content gating") + return False + + # Check JSON output extension's own exclude patterns + for pattern in get_setting(config, "exclude_patterns", []): + if isinstance(pattern, str) and docname.startswith(pattern): + return False + + return True + + +def get_document_url(app: Sphinx, docname: str) -> str: + """Get the URL for a document.""" + if not docname or not isinstance(docname, str): + logger.warning(f"Invalid docname for URL generation: {docname}") + return "invalid.html" + + try: + if hasattr(app.builder, "get_target_uri"): + return app.builder.get_target_uri(docname) + except Exception as e: # noqa: BLE001 + logger.warning(f"Failed to get target URI for {docname}: {e}") + + return docname + ".html" diff --git a/docs/conf.py b/docs/conf.py index baa058c085..9efa8a014a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -15,10 +15,15 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +import sys from datetime import date +from pathlib import Path from toml import load +# Add local extensions to path +sys.path.insert(0, str(Path(__file__).parent / "_extensions")) + project = "NVIDIA NeMo Guardrails Library Developer Guide" this_year = date.today().year copyright = f"2023-{this_year}, NVIDIA Corporation" @@ -35,6 +40,7 @@ "sphinx_reredirects", "sphinx_design", "sphinxcontrib.mermaid", + "json_output", ] redirects = { @@ -48,6 +54,8 @@ exclude_patterns = [ "README.md", + "_build/**", + "_extensions/**", ] myst_linkify_fuzzy_links = False @@ -65,10 +73,6 @@ "version": release, } -exclude_patterns = [ - "_build/**", -] - myst_url_schemes = { "http": None, "https": None, @@ -118,3 +122,9 @@ } html_baseurl = "https://docs.nvidia.com/nemo/guardrails/latest/" + +# JSON output extension settings +json_output_settings = { + "enabled": True, + "verbose": True, +} From 94ebad20ad497dea0294c7f45e49f724f9d91c20 Mon Sep 17 00:00:00 2001 From: Miyoung Choi Date: Fri, 16 Jan 2026 11:59:00 -0800 Subject: [PATCH 2/6] Update docs/_extensions/json_output/README.md Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> Signed-off-by: Miyoung Choi --- docs/_extensions/json_output/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/_extensions/json_output/README.md b/docs/_extensions/json_output/README.md index 118021d22b..999fb7ebca 100644 --- a/docs/_extensions/json_output/README.md +++ b/docs/_extensions/json_output/README.md @@ -241,7 +241,7 @@ project = "NVIDIA NeMo Curator User Guide" release = "25.09" html_context = { - "product_name": "NeMo Curator", + "product_name": "NeMo Guardrails", "product_family": ["NeMo"], "site_name": "NVIDIA Technical Documentation", } From da4326347a2c345e0111384577e5a32d22490943 Mon Sep 17 00:00:00 2001 From: Miyoung Choi Date: Fri, 16 Jan 2026 12:03:11 -0800 Subject: [PATCH 3/6] update readme --- docs/_extensions/json_output/README.md | 67 ++++++++++++++------------ 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/docs/_extensions/json_output/README.md b/docs/_extensions/json_output/README.md index 999fb7ebca..a0d966f433 100644 --- a/docs/_extensions/json_output/README.md +++ b/docs/_extensions/json_output/README.md @@ -6,7 +6,7 @@ Similar to Hugo's output formats, this creates parallel JSON files for each docu containing metadata, content, and other structured data that can be consumed by search engines, APIs, or other applications. -The main use case is generating comprehensive search indexes for tools like Solr, +The main use case is generating comprehensive search indexes for tools like Solr, Lunr.js, or custom search implementations. ## Search Index Integration @@ -24,50 +24,50 @@ The JSON structure includes search-optimized fields and global metadata from `co ```json { - "id": "guide/installation", - "title": "Installation Guide", - "url": "/guide/installation.html", - "last_modified": "2025-01-15T10:30:00Z", - + "id": "getting-started/installation-guide", + "title": "Installation Guide", + "url": "/getting-started/installation-guide.html", + "last_modified": "2026-01-15T10:30:00Z", + "book": { - "title": "NVIDIA NeMo Curator User Guide", - "version": "25.09" + "title": "NVIDIA NeMo Guardrails Library Developer Guide", + "version": "0.11.0" }, "product": { - "name": "NeMo Curator", + "name": "NeMo Guardrails", "family": ["NeMo"], - "version": "25.09" + "version": "0.11.0" }, "site": { "name": "NVIDIA Technical Documentation" }, - + "content": "Full markdown content here...", "content_length": 5420, "word_count": 850, "format": "text", "summary": "Quick summary for previews...", "doc_type": "tutorial", - "section_path": ["Guide", "Installation"], + "section_path": ["Getting Started", "Installation Guide"], "headings": [ {"text": "Prerequisites", "level": 2, "id": "prerequisites"} ], "headings_text": "Prerequisites Installation Steps Troubleshooting", - "keywords": ["install", "setup", "prerequisites", "docker", "python"], + "keywords": ["install", "setup", "prerequisites", "pip", "python", "guardrails"], "code_blocks": [ - {"content": "pip install package", "language": "bash"} + {"content": "pip install nemoguardrails", "language": "bash"} ], "links": [ { "text": "Configuration Guide", - "url": "/guide/config.html", + "url": "/configure-rails/index.html", "type": "cross_reference", "ref_type": "doc", - "target_doc": "guide/config" + "target_doc": "configure-rails/index" }, { "text": "GitHub Repository", - "url": "https://github.com/NVIDIA/...", + "url": "https://github.com/NVIDIA/NeMo-Guardrails", "type": "external" } ], @@ -194,14 +194,14 @@ The extension can inject site-wide metadata from `conf.py` into every JSON file, By default, the extension auto-infers global metadata from standard Sphinx configuration: -| JSON Field | Source | -|------------|--------| -| `book.title` | `project` | -| `book.version` | `release` | -| `product.name` | Extracted from `project` (strips "NVIDIA" prefix and doc suffixes) | -| `product.version` | `release` | -| `product.family` | `html_context["product_family"]` (if set) | -| `site.name` | `html_context["site_name"]` (if set) | +| JSON Field | Source | Example | +|------------|--------|---------| +| `book.title` | `project` | "NVIDIA NeMo Guardrails Library Developer Guide" | +| `book.version` | `release` | "0.11.0" | +| `product.name` | Extracted from `project` (strips "NVIDIA" prefix and doc suffixes) | "NeMo Guardrails" | +| `product.version` | `release` | "0.11.0" | +| `product.family` | `html_context["product_family"]` (if set) | ["NeMo"] | +| `site.name` | `html_context["site_name"]` (if set) | "NVIDIA Technical Documentation" | ### Explicit Configuration @@ -209,8 +209,8 @@ For full control, provide explicit `global_metadata`: ```python # conf.py -project = "NVIDIA NeMo Curator User Guide" -release = "25.09" +project = "NVIDIA NeMo Guardrails Library Developer Guide" +release = "0.11.0" json_output_settings = { "enabled": True, @@ -220,7 +220,7 @@ json_output_settings = { "version": release, }, "product": { - "name": "NeMo Curator", + "name": "NeMo Guardrails", "family": ["NeMo"], "version": release, }, @@ -237,8 +237,8 @@ You can also set values via `html_context` for auto-inference: ```python # conf.py -project = "NVIDIA NeMo Curator User Guide" -release = "25.09" +project = "NVIDIA NeMo Guardrails Library Developer Guide" +release = "0.11.0" html_context = { "product_name": "NeMo Guardrails", @@ -269,12 +269,15 @@ json_output_settings = { This extension automatically respects content gating rules set by the content_gating extension at multiple levels: ### Document-Level Gating + Documents with 'only' conditions in frontmatter that fail evaluation (e.g., 'only: not ga' when building with -t ga) will be excluded from JSON generation entirely, ensuring sensitive content doesn't leak into search indexes. -### Content-Level Gating +### Content-Level Gating + Content sections wrapped in `{conditional}` directives are also properly filtered. When conditions don't match, the content is excluded from the document tree and won't appear in the generated JSON. ### Integration Details + - **Automatic Detection**: Detects if content_gating extension is loaded - **Exclude Pattern Sync**: Respects documents added to exclude_patterns by content gating - **Build Tag Awareness**: Logs current build tags for debugging @@ -289,4 +292,4 @@ The integration works seamlessly - just enable both extensions and your JSON out 3. **Set content length limits** for large documentation sites 4. **Enable content filtering** to reduce JSON file sizes 5. **Use batch processing** to control memory usage -6. **Skip large files** to avoid processing massive documents \ No newline at end of file +6. **Skip large files** to avoid processing massive documents From fcdbb59fcdd0a88086bc1299d54c4df130509cec Mon Sep 17 00:00:00 2001 From: Miyoung Choi Date: Fri, 16 Jan 2026 12:04:07 -0800 Subject: [PATCH 4/6] add DORI search assets --- docs/_extensions/search_assets/__init__.py | 187 +++ .../search_assets/enhanced-search.css | 1370 +++++++++++++++++ docs/_extensions/search_assets/main.js | 197 +++ .../search_assets/modules/DocumentLoader.js | 239 +++ .../search_assets/modules/EventHandler.js | 298 ++++ .../search_assets/modules/ResultRenderer.js | 263 ++++ .../search_assets/modules/SearchEngine.js | 817 ++++++++++ .../search_assets/modules/SearchInterface.js | 615 ++++++++ .../modules/SearchPageManager.js | 1204 +++++++++++++++ .../search_assets/modules/Utils.js | 148 ++ .../search_assets/templates/search.html | 49 + docs/conf.py | 1 + 12 files changed, 5388 insertions(+) create mode 100644 docs/_extensions/search_assets/__init__.py create mode 100644 docs/_extensions/search_assets/enhanced-search.css create mode 100644 docs/_extensions/search_assets/main.js create mode 100644 docs/_extensions/search_assets/modules/DocumentLoader.js create mode 100644 docs/_extensions/search_assets/modules/EventHandler.js create mode 100644 docs/_extensions/search_assets/modules/ResultRenderer.js create mode 100644 docs/_extensions/search_assets/modules/SearchEngine.js create mode 100644 docs/_extensions/search_assets/modules/SearchInterface.js create mode 100644 docs/_extensions/search_assets/modules/SearchPageManager.js create mode 100644 docs/_extensions/search_assets/modules/Utils.js create mode 100644 docs/_extensions/search_assets/templates/search.html diff --git a/docs/_extensions/search_assets/__init__.py b/docs/_extensions/search_assets/__init__.py new file mode 100644 index 0000000000..08cac4e9da --- /dev/null +++ b/docs/_extensions/search_assets/__init__.py @@ -0,0 +1,187 @@ +""" +Enhanced Search Extension for Sphinx +Provides enhanced search page functionality without interfering with default search +""" + +import os +import re +import shutil +from typing import Any + +from sphinx.application import Sphinx +from sphinx.config import Config +from sphinx.util import logging + +logger = logging.getLogger(__name__) + + +def bundle_javascript_modules(extension_dir: str, output_path: str, minify: bool = False) -> None: + """Bundle all JavaScript modules into a single file.""" + + # Define the module loading order (dependencies first) + module_files = [ + ("modules", "Utils.js"), + ("modules", "DocumentLoader.js"), + ("modules", "SearchEngine.js"), + ("modules", "SearchInterface.js"), + ("modules", "ResultRenderer.js"), + ("modules", "EventHandler.js"), + ("modules", "SearchPageManager.js"), + ("", "main.js"), # Main file in root + ] + + bundled_content = [] + bundled_content.append("// Enhanced Search Bundle - Generated automatically") + bundled_content.append( + "// Contains: Utils, DocumentLoader, SearchEngine, SearchInterface, ResultRenderer, EventHandler, SearchPageManager, main" + ) + bundled_content.append("") + + for subdir, filename in module_files: + if subdir: + module_path = os.path.join(extension_dir, subdir, filename) + else: + module_path = os.path.join(extension_dir, filename) + + if os.path.exists(module_path): + with open(module_path, encoding="utf-8") as f: + content = f.read() + + # Remove module loading code since everything is bundled + content = content.replace("await this.loadModules();", "// Modules bundled - no loading needed") + content = content.replace( + "await this.loadModuleWithFallback(name)", "// Modules bundled - no loading needed" + ) + + # Simple minification if requested + if minify: + # Remove extra whitespace and comments (basic minification) + # Remove single-line comments but preserve URLs + content = re.sub(r"^\s*//.*$", "", content, flags=re.MULTILINE) + # Remove multi-line comments + content = re.sub(r"/\*.*?\*/", "", content, flags=re.DOTALL) + # Remove extra whitespace + content = re.sub(r"\n\s*\n", "\n", content) + content = re.sub(r"^\s+", "", content, flags=re.MULTILINE) + + bundled_content.append(f"// === {filename} ===") + bundled_content.append(content) + bundled_content.append("") + + logger.info(f"Bundled: {filename}") + else: + logger.warning(f"Module not found for bundling: {module_path}") + + # Write the bundled file + with open(output_path, "w", encoding="utf-8") as f: + f.write("\n".join(bundled_content)) + + file_size = os.path.getsize(output_path) + size_kb = file_size / 1024 + logger.info(f"Enhanced Search JavaScript bundle created: {output_path} ({size_kb:.1f}KB)") + + +def add_template_path(_app: Sphinx, config: Config) -> None: + """Add template path during config initialization.""" + extension_dir = os.path.dirname(os.path.abspath(__file__)) + templates_path = os.path.join(extension_dir, "templates") + + if os.path.exists(templates_path): + # Ensure templates_path is a list + if not isinstance(config.templates_path, list): + config.templates_path = list(config.templates_path) if config.templates_path else [] + + # Add our template path if not already present + if templates_path not in config.templates_path: + config.templates_path.append(templates_path) + logger.info(f"Enhanced search templates added: {templates_path}") + + +def copy_assets(app: Sphinx, exc: Exception | None) -> None: + """Copy assets to _static after build.""" + if exc is not None: # Only run if build succeeded + return + + extension_dir = os.path.dirname(os.path.abspath(__file__)) + static_path = os.path.join(app.outdir, "_static") + os.makedirs(static_path, exist_ok=True) + + # Copy CSS file + css_file = os.path.join(extension_dir, "enhanced-search.css") + if os.path.exists(css_file): + shutil.copy2(css_file, os.path.join(static_path, "enhanced-search.css")) + logger.info("Enhanced search CSS copied") + + # Copy main JavaScript file + main_js = os.path.join(extension_dir, "main.js") + if os.path.exists(main_js): + shutil.copy2(main_js, os.path.join(static_path, "main.js")) + logger.info("Enhanced search main.js copied") + + # Copy module files + modules_dir = os.path.join(extension_dir, "modules") + if os.path.exists(modules_dir): + modules_static_dir = os.path.join(static_path, "modules") + os.makedirs(modules_static_dir, exist_ok=True) + for module_file in os.listdir(modules_dir): + if module_file.endswith(".js"): + shutil.copy2(os.path.join(modules_dir, module_file), os.path.join(modules_static_dir, module_file)) + logger.info("Enhanced search modules copied") + + +def copy_assets_early(app: Sphinx, _docname: str, _source: list[str]) -> None: + """Copy bundled assets to _static early in the build process.""" + # Only copy once - use a flag to prevent multiple copies + if hasattr(app, "_search_assets_copied"): + return + + extension_dir = os.path.dirname(os.path.abspath(__file__)) + static_path = os.path.join(app.outdir, "_static") + os.makedirs(static_path, exist_ok=True) + + # Copy CSS file + css_file = os.path.join(extension_dir, "enhanced-search.css") + if os.path.exists(css_file): + shutil.copy2(css_file, os.path.join(static_path, "enhanced-search.css")) + logger.info("Enhanced search CSS copied") + + # Create bundled JavaScript file instead of copying individual modules + bundle_path = os.path.join(static_path, "search-assets.bundle.js") + bundle_javascript_modules(extension_dir, bundle_path) + + # Mark as copied + app._search_assets_copied = True + + +def setup(app: Sphinx) -> dict[str, Any]: + """Setup the enhanced search extension.""" + + # Get the directory where this extension is located + extension_dir = os.path.dirname(os.path.abspath(__file__)) + + # Connect to config-inited event to add template path + app.connect("config-inited", add_template_path) + + # Copy assets early in the build process so JS modules are available + app.connect("source-read", copy_assets_early) + + # Add CSS file + css_file = os.path.join(extension_dir, "enhanced-search.css") + if os.path.exists(css_file): + app.add_css_file("enhanced-search.css") + logger.info("Enhanced search CSS loaded") + else: + logger.warning(f"Enhanced search CSS not found at {css_file}") + + # Add the bundled JavaScript file (contains all modules) + app.add_js_file("search-assets.bundle.js") + logger.info("Enhanced search bundled JS will be loaded") + + # Connect to build events (backup) + app.connect("build-finished", copy_assets) + + return { + "version": "2.0.0", + "parallel_read_safe": True, + "parallel_write_safe": True, + } diff --git a/docs/_extensions/search_assets/enhanced-search.css b/docs/_extensions/search_assets/enhanced-search.css new file mode 100644 index 0000000000..ec61338c25 --- /dev/null +++ b/docs/_extensions/search_assets/enhanced-search.css @@ -0,0 +1,1370 @@ +/** + * Enhanced Search Styles + * Aligned with NVIDIA Sphinx theme - full light/dark mode support + * Uses theme variables exclusively - no hardcoded colors + */ + +/* CSS Variables for theming */ +:root { + --search-primary-color: var(--nv-color-green, #76b900); + --search-background: var(--pst-color-background, #ffffff); + --search-surface: var(--pst-color-surface, #f8f9fa); + --search-text-primary: var(--pst-color-text-base, #333333); + --search-text-secondary: var(--pst-color-text-muted, #6c757d); + --search-border: var(--pst-color-border, #e1e4e8); + --search-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); + --search-font-family: var(--pst-font-family-base, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif); +} + +/* ===== SEARCH PAGE STYLES ===== */ + +/* Unified Search Controls Container */ +.search-controls-container { + background: linear-gradient(to bottom, var(--pst-color-background), var(--pst-color-surface)); + border: 1px solid var(--pst-color-on-surface); + border-radius: 1rem; + padding: 1.5rem; + box-shadow: + 0 4px 6px -1px rgba(0, 0, 0, 0.05), + 0 2px 4px -1px rgba(0, 0, 0, 0.03), + inset 0 1px 0 rgba(255, 255, 255, 0.1); +} + +/* Search Filters */ +.search-filters { + margin-bottom: 1.25rem; +} + +/* Filter Header */ +.filter-header { + display: flex; + align-items: center; + justify-content: space-between; + margin-bottom: 1rem; + padding-bottom: 0.75rem; + border-bottom: 1px solid var(--pst-color-on-surface); +} + +.filter-header-left { + display: flex; + align-items: center; + gap: 0.5rem; +} + +.filter-header-icon { + color: var(--nv-color-green); + font-size: 0.875rem; +} + +.filter-header-title { + font-size: 0.8125rem; + font-weight: 600; + color: var(--pst-color-text-base); + text-transform: uppercase; + letter-spacing: 0.05em; +} + +.active-filter-count { + display: inline-flex; + align-items: center; + justify-content: center; + min-width: 1.25rem; + height: 1.25rem; + padding: 0 0.375rem; + font-size: 0.6875rem; + font-weight: 700; + color: white; + background: var(--nv-color-green); + border-radius: 1rem; +} + +.filter-clear-btn { + display: inline-flex; + align-items: center; + gap: 0.375rem; + padding: 0.375rem 0.75rem; + font-size: 0.75rem; + font-weight: 500; + color: var(--pst-color-text-muted); + background: transparent; + border: 1px solid var(--pst-color-on-surface); + border-radius: 0.375rem; + cursor: pointer; + transition: all 0.2s ease; +} + +.filter-clear-btn:hover { + color: var(--pst-color-text-base); + background: var(--pst-color-surface); + border-color: var(--pst-color-text-muted); +} + +.filter-clear-btn.hidden { + opacity: 0; + pointer-events: none; +} + +/* Filter Grid */ +.filter-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); + gap: 1rem; +} + +.filter-group { + display: flex; + flex-direction: column; + gap: 0.375rem; +} + +.filter-label { + display: flex; + align-items: center; + gap: 0.375rem; + font-size: 0.6875rem; + font-weight: 600; + color: var(--pst-color-text-muted); + text-transform: uppercase; + letter-spacing: 0.05em; +} + +.filter-label i { + font-size: 0.625rem; + color: var(--pst-color-text-muted); + opacity: 0.7; +} + +/* Filter Select Wrapper */ +.filter-select-wrapper { + position: relative; + display: flex; + align-items: center; +} + +.filter-select-wrapper.has-value { + --select-border-color: var(--nv-color-green); + --select-bg-color: rgba(118, 185, 0, 0.05); +} + +.filter-select { + width: 100%; + padding: 0.5rem 2rem 0.5rem 0.75rem; + font-size: 0.8125rem; + font-family: var(--pst-font-family-base); + color: var(--pst-color-text-base); + background-color: var(--select-bg-color, var(--pst-color-background)); + border: 1px solid var(--select-border-color, var(--pst-color-on-surface)); + border-radius: 0.5rem; + outline: none; + appearance: none; + cursor: pointer; + transition: all 0.2s ease; + text-overflow: ellipsis; +} + +.filter-select:focus { + border-color: var(--nv-color-green); + box-shadow: 0 0 0 3px rgba(118, 185, 0, 0.15); +} + +.filter-select:hover:not(:focus) { + border-color: var(--pst-color-text-muted); + background-color: var(--pst-color-surface); +} + +.filter-select-arrow { + position: absolute; + right: 0.625rem; + font-size: 0.625rem; + color: var(--pst-color-text-muted); + pointer-events: none; + transition: transform 0.2s ease; +} + +.filter-select:focus+.filter-select-arrow { + color: var(--nv-color-green); +} + +.filter-select option { + background-color: var(--pst-color-background); + color: var(--pst-color-text-base); + padding: 0.5rem; +} + +/* Search Input Wrapper */ +.search-input-wrapper { + position: relative; + display: flex; + align-items: center; +} + +.search-input-icon { + position: absolute; + left: 1rem; + font-size: 1rem; + color: var(--pst-color-text-muted); + pointer-events: none; + transition: color 0.2s ease; + z-index: 1; +} + +.search-input-field { + width: 100%; + padding: 0.875rem 1rem 0.875rem 2.75rem; + font-size: 1rem; + font-family: var(--pst-font-family-base); + font-weight: 400; + line-height: 1.5; + color: var(--pst-color-text-base); + background-color: var(--pst-color-background); + border: 2px solid var(--pst-color-on-surface); + border-radius: 0.75rem; + outline: none; + transition: all 0.2s ease; +} + +.search-input-field:focus { + border-color: var(--nv-color-green); + box-shadow: 0 0 0 4px rgba(118, 185, 0, 0.12); +} + +.search-input-field:focus+.search-input-icon, +.search-input-wrapper:focus-within .search-input-icon { + color: var(--nv-color-green); +} + +.search-input-field::placeholder { + color: var(--pst-color-text-muted); + opacity: 0.8; +} + +/* Legacy filter-row support */ +.filter-row { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); + gap: 1rem; + align-items: end; +} + +.filter-actions { + display: flex; + align-items: center; + gap: 0.5rem; + justify-self: end; +} + +.btn { + display: inline-flex; + align-items: center; + gap: 0.5rem; + padding: 0.5rem 1rem; + font-size: 0.875rem; + font-weight: 500; + font-family: var(--pst-font-family-base); + text-decoration: none; + border-radius: 0.25rem; + border: 1px solid transparent; + cursor: pointer; + transition: all 0.15s ease-in-out; +} + +.btn-sm { + padding: 0.375rem 0.75rem; + font-size: 0.8125rem; +} + +.btn-secondary { + color: var(--pst-color-text-base); + background-color: transparent; + border-color: var(--pst-color-on-surface); +} + +.btn-secondary:hover { + color: var(--pst-color-background); + background-color: var(--pst-color-text-base); + border-color: var(--pst-color-text-base); +} + +.btn-secondary:focus { + color: var(--pst-color-text-base); + background-color: transparent; + border-color: var(--nv-color-green); + box-shadow: 0 0 0 0.2rem rgba(118, 185, 0, 0.25); +} + +.btn-outline-secondary { + color: var(--pst-color-text-base); + background-color: transparent; + border-color: var(--pst-color-on-surface); +} + +.btn-outline-secondary:hover { + color: var(--pst-color-background); + background-color: var(--pst-color-text-base); + border-color: var(--pst-color-text-base); +} + +/* Responsive filters */ +@media (max-width: 900px) { + .filter-grid { + grid-template-columns: repeat(3, 1fr); + } +} + +@media (max-width: 768px) { + .search-controls-container { + padding: 1rem; + border-radius: 0.75rem; + } + + .search-filters { + margin-bottom: 1rem; + } + + .filter-header { + flex-wrap: wrap; + gap: 0.75rem; + } + + .filter-grid { + grid-template-columns: repeat(2, 1fr); + gap: 0.75rem; + } + + .filter-group { + min-width: auto; + } + + .filter-actions { + grid-column: 1; + justify-self: center; + margin-top: 0.75rem; + } + + .search-input-field { + padding: 0.75rem 1rem 0.75rem 2.5rem; + font-size: 1rem; + } +} + +@media (max-width: 480px) { + .filter-grid { + grid-template-columns: 1fr; + } + + .filter-header-left { + flex: 1; + } +} + +/* Legacy input ID selector - now handled by .search-input-field */ +#enhanced-search-page-input { + width: 100%; + padding: 0.875rem 1rem 0.875rem 2.75rem; + font-size: 1rem; + font-family: var(--pst-font-family-base); + font-weight: 400; + line-height: 1.5; + color: var(--pst-color-text-base); + background-color: var(--pst-color-background); + border: 2px solid var(--pst-color-on-surface); + border-radius: 0.75rem; + outline: none; + transition: all 0.2s ease; +} + +.search-input-unified { + margin-top: 0 !important; +} + +#enhanced-search-page-input:focus { + border-color: var(--nv-color-green); + box-shadow: 0 0 0 4px rgba(118, 185, 0, 0.12); +} + +#enhanced-search-page-input::placeholder { + color: var(--pst-color-text-muted); + opacity: 0.8; +} + +.loading { + display: inline-block; + margin-left: 0.5rem; + color: var(--pst-color-text-muted); +} + +.spinner { + display: inline-block; + width: 1rem; + height: 1rem; + border: 0.125rem solid var(--pst-color-text-muted); + border-radius: 50%; + border-top-color: var(--nv-color-green); + animation: spin 1s ease-in-out infinite; +} + +@keyframes spin { + to { + transform: rotate(360deg); + } +} + +#search-results { + margin-top: 1.5rem; +} + +/* ===== SEARCH RESULTS STYLES ===== */ + +.search-results-header { + margin-bottom: 1.5rem; + padding-bottom: 1rem; + border-bottom: 1px solid var(--pst-color-on-surface); +} + +.search-results-header h3 { + color: var(--pst-color-heading); + font-family: var(--pst-font-family-heading); + font-weight: var(--pst-font-weight-heading); + font-size: var(--pst-font-size-h3); + margin: 0 0 0.5rem 0; +} + +.search-results-header p { + color: var(--pst-color-text-muted); + font-size: 0.875rem; + margin: 0; +} + +/* Search Result Cards */ +.search-result { + background-color: var(--pst-color-background); + border: 1px solid var(--pst-color-on-surface); + border-radius: 0.5rem; + padding: 1.5rem; + margin-bottom: 1.5rem; + transition: all 0.2s ease-in-out; + position: relative; + overflow: hidden; +} + +.search-result::before { + content: ''; + position: absolute; + top: 0; + left: 0; + right: 0; + height: 4px; + background: linear-gradient(90deg, var(--nv-color-green), var(--nv-color-green-2)); + transform: scaleX(0); + transform-origin: left; + transition: transform 0.2s ease-in-out; +} + +.search-result:hover { + border-color: var(--nv-color-green); + box-shadow: 0 0.5rem 1rem rgba(0, 0, 0, 0.1); + transform: translateY(-0.125rem); +} + +.search-result:hover::before { + transform: scaleX(1); +} + +/* Result Header */ +.result-header { + display: flex; + align-items: flex-start; + gap: 1rem; + margin-bottom: 1rem; +} + +.section-icon { + flex-shrink: 0; + width: 3rem; + height: 3rem; + border-radius: 0.5rem; + display: flex; + align-items: center; + justify-content: center; + font-size: 1.25rem; + font-weight: 700; + color: var(--pst-color-background); + background: var(--nv-color-green); + border: 1px solid var(--pst-color-on-surface); +} + +.result-info { + flex-grow: 1; + min-width: 0; +} + +.result-title { + margin: 0 0 0.5rem 0; + font-family: var(--pst-font-family-heading); + font-weight: var(--pst-font-weight-heading); + font-size: var(--pst-font-size-h4); + line-height: 1.25; +} + +.result-title a { + color: var(--pst-color-heading); + text-decoration: none; + transition: color 0.15s ease-in-out; +} + +.result-title a:hover { + color: var(--nv-color-green); + text-decoration: underline; + text-decoration-color: var(--nv-color-green); + text-decoration-thickness: max(3px, 0.1875rem, 0.12em); +} + +/* Breadcrumb */ +.result-breadcrumb { + display: flex; + align-items: center; + gap: 0.5rem; + font-size: 0.875rem; + color: var(--pst-color-text-muted); + margin-bottom: 0.5rem; + font-family: var(--pst-font-family-base); +} + +.result-breadcrumb .breadcrumb-separator { + color: var(--pst-color-text-muted); + font-weight: 400; +} + +/* Meta Information */ +.result-meta { + display: flex; + align-items: center; + gap: 1rem; + flex-wrap: wrap; +} + +.section-badge { + display: inline-flex; + align-items: center; + gap: 0.25rem; + padding: 0.25rem 0.5rem; + background-color: var(--pst-color-background); + border: 1px solid var(--pst-color-on-surface); + border-radius: 1rem; + font-size: 0.75rem; + font-weight: 500; + color: var(--pst-color-text-base); + text-transform: uppercase; + letter-spacing: 0.05em; +} + +.relevance-score { + font-size: 0.75rem; + color: var(--pst-color-text-muted); + font-weight: 500; + font-family: var(--pst-font-family-monospace); +} + +/* Result Content */ +.result-content { + color: var(--pst-color-text-base); + font-family: var(--pst-font-family-base); + line-height: 1.6; + margin-bottom: 1rem; +} + +.result-content p { + margin: 0 0 0.75rem 0; +} + +.result-content p:last-child { + margin-bottom: 0; +} + +.result-summary { + color: var(--pst-color-text-base); + font-size: 0.9rem; + line-height: 1.5; + margin-bottom: 1rem; +} + +/* Matching Sections */ +.matching-sections { + margin-top: 1rem; + padding-top: 1rem; + border-top: 1px solid var(--pst-color-on-surface); +} + +.matching-sections h4, +.matching-sections h5 { + color: var(--pst-color-heading); + font-family: var(--pst-font-family-heading); + font-weight: 500; + font-size: 0.875rem; + text-transform: uppercase; + letter-spacing: 0.05em; + margin: 0 0 0.75rem 0; + display: flex; + align-items: center; + gap: 0.5rem; +} + +.section-links { + background-color: var(--pst-color-background); + border: 1px solid var(--pst-color-on-surface); + border-radius: 0.5rem; + padding: 0.75rem; +} + +.section-link { + display: flex; + align-items: center; + gap: 0.75rem; + padding: 0.5rem 0.75rem; + border-radius: 0.25rem; + font-size: 0.875rem; + color: var(--pst-color-text-base); + text-decoration: none; + transition: all 0.15s ease-in-out; + font-family: var(--pst-font-family-base); + margin-bottom: 0.25rem; +} + +.section-link:last-child { + margin-bottom: 0; +} + +.section-link:hover { + background-color: var(--nv-color-green); + color: var(--pst-color-background); + text-decoration: none; + transform: translateY(-0.0625rem); + box-shadow: 0 0.25rem 0.5rem rgba(118, 185, 0, 0.25); +} + +.section-link .section-icon { + width: 1.5rem; + height: 1.5rem; + font-size: 0.875rem; + background: var(--pst-color-surface); + color: var(--pst-color-primary); +} + +.section-link:hover .section-icon { + background: var(--pst-color-background); + color: var(--nv-color-green); +} + +/* Enhanced Result Features */ +.result-tag, +.result-category { + display: inline-flex; + align-items: center; + padding: 0.25rem 0.5rem; + font-size: 0.75rem; + font-weight: 500; + border-radius: 0.25rem; + text-decoration: none; + margin-right: 0.25rem; + margin-bottom: 0.25rem; +} + +.result-tag { + background-color: var(--pst-color-surface); + color: var(--pst-color-text-base); + border: 1px solid var(--pst-color-on-surface); + font-size: 0.75rem; + padding: 0.25rem 0.5rem; + border-radius: 0.25rem; + display: inline-block; + margin-right: 0.5rem; + margin-bottom: 0.25rem; +} + +.result-category { + background-color: rgba(118, 185, 0, 0.1); + color: var(--nv-color-green); + border: 1px solid rgba(118, 185, 0, 0.2); +} + +.multiple-matches-indicator { + display: inline-flex; + align-items: center; + padding: 0.25rem 0.5rem; + font-size: 0.75rem; + font-weight: 500; + color: var(--nv-color-green); + background-color: rgba(118, 185, 0, 0.1); + border-radius: 0.25rem; + border: 1px solid rgba(118, 185, 0, 0.2); + margin-left: 0.5rem; +} + +.more-tags, +.more-categories { + font-size: 0.75rem; + color: var(--pst-color-text-muted); + font-style: italic; + margin-left: 0.25rem; +} + +.result-tags, +.result-categories { + display: flex; + flex-wrap: wrap; + gap: 0.25rem; + align-items: center; +} + +/* Badge styles */ +.badge { + display: inline-flex; + align-items: center; + padding: 0.375rem 0.5rem; + font-size: 0.75rem; + font-weight: 500; + border-radius: 0.25rem; + text-decoration: none; +} + +.bg-secondary { + background-color: var(--pst-color-text-muted) !important; + color: var(--pst-color-background) !important; +} + +.bg-info { + background-color: rgba(118, 185, 0, 0.9) !important; + color: var(--pst-color-background) !important; +} + +.bg-light { + background-color: transparent !important; + color: var(--pst-color-text-muted) !important; + border: 1px solid var(--pst-color-on-surface) !important; +} + +/* Metadata badges */ +.metadata-badge { + display: inline-flex; + align-items: center; + padding: 0.2rem 0.5rem; + margin-right: 0.5rem; + margin-bottom: 0.25rem; + font-size: 0.75rem; + font-weight: 500; + border-radius: 0.375rem; + border: 1px solid; + cursor: help; + transition: all 0.2s ease; +} + +.persona-badge { + background-color: #e8f5e8; + color: #2d5a2d; + border-color: #c3e6c3; +} + +.difficulty-badge { + background-color: #fff3cd; + color: #856404; + border-color: #ffeaa7; +} + +.modality-badge { + background-color: #e2f3ff; + color: #0c5460; + border-color: #b8daff; +} + +.metadata-badge:hover { + transform: translateY(-1px); + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); +} + +/* Clickable badge styles */ +.clickable-badge { + cursor: pointer; + transition: all 0.2s ease; + user-select: none; +} + +.clickable-badge:hover { + transform: translateY(-1px); + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.15); + filter: brightness(1.1); +} + +.clickable-badge:active { + transform: translateY(0); +} + +.result-tag.clickable-badge:hover { + background-color: var(--nv-color-green); + color: var(--pst-color-background); + border-color: var(--nv-color-green); +} + +/* Active filter display */ +.active-filters-display { + background-color: var(--pst-color-surface); + border: 1px solid var(--pst-color-on-surface); + border-radius: 0.375rem; + padding: 0.75rem; +} + +.active-filter-badge { + display: inline-flex; + align-items: center; + padding: 0.2rem 0.5rem; + margin-right: 0.5rem; + font-size: 0.75rem; + font-weight: 500; + border-radius: 0.25rem; + background-color: var(--nv-color-green); + color: var(--pst-color-background); + border: 1px solid var(--nv-color-green); +} + +/* Utility classes for layout */ +.mb-1 { + margin-bottom: 0.25rem !important; +} + +.mb-2 { + margin-bottom: 0.5rem !important; +} + +.mb-3 { + margin-bottom: 1rem !important; +} + +.mb-4 { + margin-bottom: 1.5rem !important; +} + +.mt-1 { + margin-top: 0.25rem !important; +} + +.mt-3 { + margin-top: 1rem !important; +} + +.me-1 { + margin-right: 0.25rem !important; +} + +.me-2 { + margin-right: 0.5rem !important; +} + +.me-3 { + margin-right: 1rem !important; +} + +.ms-2 { + margin-left: 0.5rem !important; +} + +.ms-4 { + margin-left: 1.5rem !important; +} + +.d-flex { + display: flex !important; +} + +.align-items-center { + align-items: center !important; +} + +.align-items-start { + align-items: flex-start !important; +} + +.flex-grow-1 { + flex-grow: 1 !important; +} + +.flex-wrap { + flex-wrap: wrap !important; +} + +.gap-2 { + gap: 0.5rem !important; +} + +.text-decoration-none { + text-decoration: none !important; +} + +.text-center { + text-align: center !important; +} + +.text-muted { + color: var(--pst-color-text-muted) !important; +} + +.py-4 { + padding-top: 1.5rem !important; + padding-bottom: 1.5rem !important; +} + +.p-2 { + padding: 0.5rem !important; +} + +.border { + border: 1px solid var(--pst-color-on-surface) !important; +} + +.rounded { + border-radius: 0.25rem !important; +} + +.small { + font-size: 0.875rem !important; +} + +/* Empty and Error States */ +.no-results { + text-align: center; + padding: 3rem 1rem; + color: var(--pst-color-text-muted); + font-family: var(--pst-font-family-base); +} + +.no-results h3 { + color: var(--pst-color-heading); + font-family: var(--pst-font-family-heading); + font-weight: var(--pst-font-weight-heading); + font-size: var(--pst-font-size-h3); + margin: 0 0 1rem 0; +} + +.no-results p { + font-size: 1.125rem; + line-height: 1.6; + margin: 0; +} + +.error-message { + background-color: var(--pst-color-surface); + border: 1px solid var(--pst-color-on-surface); + border-left: 4px solid var(--nv-color-green); + border-radius: 0.5rem; + padding: 1rem; + margin: 1rem 0; + color: var(--pst-color-text-base); + font-family: var(--pst-font-family-base); +} + +/* Search Highlighting */ +.search-highlight, +mark { + background-color: rgba(118, 185, 0, 0.2); + color: var(--pst-color-text-base); + padding: 0.0625rem 0.125rem; + border-radius: 0.125rem; + font-weight: 400; + border: 1px solid rgba(118, 185, 0, 0.3); +} + +/* Section-specific icon colors and styles */ +.section-badge.getting-started { + background: linear-gradient(135deg, var(--nv-color-green), var(--nv-color-green-2)); + color: var(--pst-color-background); + border-color: var(--nv-color-green); +} + +.section-badge.admin { + background-color: var(--pst-color-surface); + color: var(--pst-color-text-base); +} + +.section-badge.reference { + background-color: var(--pst-color-surface); + color: var(--pst-color-text-base); +} + +.section-badge.tutorial { + background-color: var(--pst-color-surface); + color: var(--pst-color-text-base); +} + +/* Empty state icons and messaging */ +.search-empty-state, +.search-no-results { + text-align: center; + padding: 2rem; + color: var(--pst-color-text-muted); + font-family: var(--pst-font-family-base); +} + +.search-empty-state i, +.search-no-results i { + font-size: 3rem; + color: var(--pst-color-text-muted); + margin-bottom: 1rem; + display: block; +} + +.search-empty-state h4, +.search-no-results h4 { + color: var(--pst-color-heading); + font-family: var(--pst-font-family-heading); + font-size: var(--pst-font-size-h4); + margin-bottom: 0.5rem; +} + +.search-empty-state p, +.search-no-results p { + color: var(--pst-color-text-muted); + font-size: 1rem; + line-height: 1.5; + margin-bottom: 1rem; +} + +/* Responsive Design */ +@media (max-width: 768px) { + .search-result { + padding: 1rem; + margin-bottom: 1rem; + } + + .result-header { + flex-direction: column; + gap: 0.75rem; + } + + .section-icon { + width: 2.5rem; + height: 2.5rem; + font-size: 1rem; + } + + .result-title { + font-size: var(--pst-font-size-h5); + } + + .result-meta { + flex-direction: column; + align-items: flex-start; + gap: 0.5rem; + } + + .section-links { + padding: 0.5rem; + } + + .section-link { + padding: 0.375rem 0.5rem; + font-size: 0.8125rem; + } + + #enhanced-search-page-input { + font-size: 1rem; + padding: 0.875rem 1rem; + } +} + +/* High contrast mode support */ +@media (prefers-contrast: high) { + .search-result { + border-width: 2px; + } + + .search-result:hover { + border-width: 3px; + } + + .search-highlight, + mark { + outline: 1px solid var(--pst-color-text-base); + } +} + +/* Reduced motion support */ +@media (prefers-reduced-motion: reduce) { + + .search-result, + .section-link, + #enhanced-search-page-input, + .search-result::before { + transition: none; + } + + .spinner { + animation: none; + } +} + +/* Print styles */ +@media print { + .search-result { + break-inside: avoid; + box-shadow: none; + border: 1px solid; + margin-bottom: 1rem; + background: transparent !important; + } + + .section-icon { + background: transparent !important; + border: 1px solid; + } + + .section-link { + text-decoration: underline !important; + } + + .search-highlight, + mark { + background: transparent !important; + text-decoration: underline; + font-weight: bold; + } +} + +/* Focus states for accessibility */ +#enhanced-search-page-input:focus-visible { + outline: 2px solid var(--nv-color-green); + outline-offset: 2px; +} + +.section-link:focus-visible { + outline: 2px solid var(--nv-color-green); + outline-offset: 2px; +} + +.result-title a:focus-visible { + outline: 2px solid var(--nv-color-green); + outline-offset: 2px; + border-radius: 0.125rem; +} + +/* Dark theme support */ +html[data-theme="dark"] .search-result { + background: var(--pst-color-surface-200, #1f2937); +} + +html[data-theme="dark"] .search-result:hover { + background: var(--pst-color-surface-300, #111827); +} + +html[data-theme="dark"] .search-results-header h3 { + color: var(--pst-color-text-base, #f9fafb); +} + +/* Accessibility enhancements */ +@media (prefers-reduced-motion: reduce) { + + .search-result, + .section-link, + #enhanced-search-page-input { + transition: none; + } +} + +@media (prefers-contrast: high) { + .search-result { + border-color: var(--pst-color-text-base); + } + + .search-highlight, + mark { + background: var(--nv-color-green); + color: var(--pst-color-background); + } +} + +/* AI Assistant container styling */ +.ai-assistant-container { + border: 1px solid var(--pst-color-border); + border-radius: var(--pst-border-radius); + background: var(--pst-color-surface); + padding: 1rem; + margin-top: 1.5rem; +} + +.ai-assistant-container .ai-loading { + text-align: center; + padding: 2rem; + color: var(--pst-color-text-muted); +} + +.ai-assistant-container .ai-response { + line-height: 1.6; +} + +.ai-assistant-container .ai-error { + color: var(--pst-color-danger); + background: var(--pst-color-danger-bg); + padding: 1rem; + border-radius: var(--pst-border-radius); + border-left: 4px solid var(--pst-color-danger); +} + +/* AI Assistant dark theme support */ +html[data-theme="dark"] .ai-assistant-container { + background: var(--pst-color-surface-200, #1f2937); + border-color: var(--pst-color-border-dark, #374151); +} + +/* ===== TOPIC BADGES ===== */ +.result-topics { + display: flex; + flex-wrap: wrap; + gap: 0.25rem; + align-items: center; +} + +.topic-badge { + display: inline-flex; + align-items: center; + padding: 0.25rem 0.5rem; + margin-right: 0.25rem; + font-size: 0.75rem; + background: var(--topic-bg, #e8f5e9); + color: var(--topic-text, #2e7d32); + border-radius: 4px; + cursor: pointer; + transition: all 0.15s ease-in-out; + border: 1px solid rgba(46, 125, 50, 0.2); +} + +.topic-badge:hover { + background: var(--topic-bg-hover, #c8e6c9); + transform: translateY(-1px); + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); +} + +.topic-badge:active { + transform: translateY(0); +} + +/* Dark theme topic badges */ +html[data-theme="dark"] .topic-badge { + background: rgba(118, 185, 0, 0.15); + color: var(--nv-color-green); + border-color: rgba(118, 185, 0, 0.3); +} + +html[data-theme="dark"] .topic-badge:hover { + background: rgba(118, 185, 0, 0.25); +} + +/* More Topics Indicator */ +.more-topics { + font-size: 0.75rem; + color: var(--pst-color-text-muted); + padding: 0.25rem; + font-style: italic; +} + +/* ===== RESULT BREAKDOWN ===== */ +.result-breakdown { + margin-left: 0.5rem; + font-size: 0.875rem; + color: var(--pst-color-text-muted); +} + +.result-breakdown::before { + content: '— '; +} + +/* ===== KEYBOARD NAVIGATION FOCUS STATES ===== */ +.search-result.focused { + outline: 2px solid var(--nv-color-green, #76b900); + outline-offset: 2px; + border-radius: 8px; + background-color: rgba(118, 185, 0, 0.05); +} + +.search-result:focus-visible { + outline: 2px solid var(--nv-color-green, #76b900); + outline-offset: 2px; +} + +/* Animation for focus transition */ +.search-result { + transition: outline 0.15s ease-in-out, background-color 0.15s ease-in-out, transform 0.2s ease-in-out, border-color 0.2s ease-in-out, box-shadow 0.2s ease-in-out; +} + +/* Dark theme focus states */ +html[data-theme="dark"] .search-result.focused { + background-color: rgba(118, 185, 0, 0.1); + outline-color: var(--nv-color-green); +} + +/* ===== EXTENDED FILTER GRID RESPONSIVE ===== */ +@media (max-width: 1200px) { + .filter-grid { + grid-template-columns: repeat(3, 1fr); + } +} + +/* Dark theme filter enhancements */ +html[data-theme="dark"] .search-controls-container { + background: linear-gradient(to bottom, var(--pst-color-surface-200, #1f2937), var(--pst-color-surface-300, #111827)); + border-color: var(--pst-color-border-dark, #374151); +} + +html[data-theme="dark"] .filter-header { + border-bottom-color: var(--pst-color-border-dark, #374151); +} + +html[data-theme="dark"] .filter-select { + background-color: var(--pst-color-surface-200, #1f2937); + border-color: var(--pst-color-border-dark, #374151); +} + +html[data-theme="dark"] .filter-select:hover:not(:focus) { + background-color: var(--pst-color-surface-300, #111827); +} + +html[data-theme="dark"] .filter-clear-btn { + border-color: var(--pst-color-border-dark, #374151); +} + +html[data-theme="dark"] .filter-clear-btn:hover { + background-color: var(--pst-color-surface-300, #111827); +} + +html[data-theme="dark"] .search-input-field, +html[data-theme="dark"] #enhanced-search-page-input { + background-color: var(--pst-color-surface-200, #1f2937); + border-color: var(--pst-color-border-dark, #374151); +} + +html[data-theme="dark"] .filter-select-wrapper.has-value { + --select-bg-color: rgba(118, 185, 0, 0.1); +} + +/* ===== ACCESSIBILITY SKIP LINK ===== */ +.sr-only { + position: absolute; + width: 1px; + height: 1px; + padding: 0; + margin: -1px; + overflow: hidden; + clip: rect(0, 0, 0, 0); + white-space: nowrap; + border: 0; +} + +/* Visual indicator for keyboard users */ +.search-results-list:focus-within { + outline: 1px dashed var(--pst-color-text-muted); + outline-offset: 4px; + border-radius: 8px; +} + +/* Reduced motion support for keyboard navigation */ +@media (prefers-reduced-motion: reduce) { + .search-result.focused { + transition: none; + } +} \ No newline at end of file diff --git a/docs/_extensions/search_assets/main.js b/docs/_extensions/search_assets/main.js new file mode 100644 index 0000000000..2fc23965eb --- /dev/null +++ b/docs/_extensions/search_assets/main.js @@ -0,0 +1,197 @@ +/** + * Enhanced Search Main Entry Point + * Loads search engine and page manager for enhanced search page + * Does NOT interfere with default search behavior + */ + +// Prevent multiple initializations +if (typeof window.EnhancedSearch !== 'undefined') { +} else { + +// Import modules (will be loaded dynamically) +class EnhancedSearch { + constructor(options = {}) { + this.options = { + placeholder: options.placeholder || 'Search documentation...', + maxResults: options.maxResults || 20, + minQueryLength: 2, + highlightClass: 'search-highlight', + ...options + }; + + this.isLoaded = false; + + // Module instances + this.documentLoader = null; + this.searchEngine = null; + this.searchPageManager = null; + this.utils = null; + + this.init(); + } + + async init() { + try { + // Load required modules + await this.loadModules(); + + // Initialize core modules + this.utils = new Utils(); + this.documentLoader = new DocumentLoader(); + this.searchEngine = new SearchEngine(this.utils); + + // Load documents and initialize search engine (always needed) + await this.documentLoader.loadDocuments(); + await this.searchEngine.initialize(this.documentLoader.getDocuments()); + + // Check if we're on the search page + const isSearchPage = this.isSearchPage(); + + if (isSearchPage) { + this.searchPageManager = new SearchPageManager(); + } + + this.isLoaded = true; + } catch (error) { + this.fallbackToDefaultSearch(); + } + } + + isSearchPage() { + return window.location.pathname.includes('/search') || + window.location.pathname.includes('/search.html') || + window.location.pathname.endsWith('search/') || + document.querySelector('#enhanced-search-page-input') !== null || + document.querySelector('#enhanced-search-page-results') !== null; + } + + async loadModules() { + const moduleNames = [ + 'Utils', + 'DocumentLoader', + 'SearchEngine', + 'SearchPageManager' + ]; + + // Load modules with smart path resolution + const modulePromises = moduleNames.map(name => + this.loadModuleWithFallback(name) + ); + + await Promise.all(modulePromises); + } + + async loadModuleWithFallback(moduleName) { + const possiblePaths = this.getModulePaths(moduleName); + + for (const path of possiblePaths) { + try { + await this.loadModule(path); + return; + } catch (error) { + // Continue to next path + } + } + + throw new Error(`Failed to load module ${moduleName} from any path`); + } + + getModulePaths(moduleName) { + const fileName = `${moduleName}.js`; + + // Calculate nesting level to determine correct _static path + const pathParts = window.location.pathname.split('/').filter(part => part.length > 0); + const htmlFile = pathParts[pathParts.length - 1]; + + // Remove the HTML file from the count if it exists + let nestingLevel = pathParts.length; + if (htmlFile && htmlFile.endsWith('.html')) { + nestingLevel--; + } + + // Build the correct _static path based on nesting level + const staticPrefix = nestingLevel > 0 ? '../'.repeat(nestingLevel) : './'; + const staticPath = `${staticPrefix}_static`; + + // Search assets only has modules directory + const moduleDir = 'modules'; + + // Generate paths in order of likelihood + const paths = []; + + // 1. Most likely path based on calculated nesting + paths.push(`${staticPath}/${moduleDir}/${fileName}`); + + // 2. Fallback static paths (for different nesting scenarios) + paths.push(`_static/${moduleDir}/${fileName}`); + paths.push(`./_static/${moduleDir}/${fileName}`); + if (nestingLevel > 1) { + paths.push(`../_static/${moduleDir}/${fileName}`); + } + + // 3. Legacy fallback paths + paths.push(`./modules/${fileName}`); + paths.push(`../modules/${fileName}`); + paths.push(`modules/${fileName}`); + + return paths; + } + + async loadModule(src) { + return new Promise((resolve, reject) => { + const script = document.createElement('script'); + script.src = src; + script.onload = resolve; + script.onerror = () => reject(new Error(`Failed to load module: ${src}`)); + document.head.appendChild(script); + }); + } + + // Public API methods + search(query) { + if (!this.searchEngine) { + return []; + } + + return this.searchEngine.search(query); + } + + renderResults(results, query) { + // Use SearchPageManager for search page rendering + return ''; + } + + fallbackToDefaultSearch() { + // Don't interfere with default search - just fallback + } + + getDocuments() { + return this.documentLoader ? this.documentLoader.getDocuments() : []; + } + + get documents() { + return this.getDocuments(); + } + + getSearchEngine() { + return this.searchEngine; + } + + getOptions() { + return this.options; + } +} + +// Initialize the enhanced search system +window.EnhancedSearch = EnhancedSearch; + +// Auto-initialize +document.addEventListener('DOMContentLoaded', function() { + // Create the global instance + window.enhancedSearchInstance = new EnhancedSearch({ + placeholder: 'Search NVIDIA documentation...', + maxResults: 50 + }); +}); + +} // End of duplicate prevention check \ No newline at end of file diff --git a/docs/_extensions/search_assets/modules/DocumentLoader.js b/docs/_extensions/search_assets/modules/DocumentLoader.js new file mode 100644 index 0000000000..68f31a27b9 --- /dev/null +++ b/docs/_extensions/search_assets/modules/DocumentLoader.js @@ -0,0 +1,239 @@ +/** + * DocumentLoader Module + * Handles loading and managing search documents from JSON index + */ + +class DocumentLoader { + constructor() { + this.documents = {}; + this.isLoaded = false; + } + + /** + * Load documents from JSON index files + */ + async loadDocuments() { + try { + const data = await this.fetchDocumentData(); + this.processDocuments(data); + this.isLoaded = true; + console.log(`✅ Document loader initialized with ${Object.keys(this.documents).length} documents`); + } catch (error) { + console.error('Failed to load search documents:', error); + throw error; + } + } + + /** + * Fetch document data from various possible paths + */ + async fetchDocumentData() { + // Try different paths to account for different page depths + const possiblePaths = [ + './index.json', + '../index.json', + '../../index.json', + '../../../index.json' + ]; + + for (const path of possiblePaths) { + try { + const response = await fetch(path); + if (response.ok) { + const data = await response.json(); + console.log(`✅ Loaded search index from: ${path}`); + return data; + } + } catch (error) { + console.log(`❌ Failed to load from ${path}: ${error.message}`); + } + } + + throw new Error('Failed to load search data from any path'); + } + + /** + * Process and filter documents from raw data + * Supports three formats: + * 1. Array of documents (new format): [{ id, title, ... }, ...] + * 2. Object with children (legacy): { children: [...] } + * 3. Single document (fallback): { id, title, ... } + */ + processDocuments(data) { + let allDocs; + if (Array.isArray(data)) { + // New format: root is an array of documents + allDocs = data; + } else if (data.children) { + // Legacy format: object with children array + allDocs = data.children; + } else { + // Fallback: single document + allDocs = [data]; + } + + // Filter out problematic documents + const filteredDocs = allDocs.filter(doc => this.isValidDocument(doc)); + + // Store documents by ID + filteredDocs.forEach(doc => { + this.documents[doc.id] = this.sanitizeDocument(doc); + }); + + console.log(`Processed ${filteredDocs.length} documents (filtered from ${allDocs.length} total)`); + } + + /** + * Check if a document is valid for indexing + */ + isValidDocument(doc) { + const docId = doc.id || ''; + return !docId.toLowerCase().includes('readme') && + !docId.startsWith('_') && + doc.title && + doc.content; + } + + /** + * Sanitize document content for safe indexing + * Supports both new schema fields and legacy fields + * Preserves dynamic facets as-is + */ + sanitizeDocument(doc) { + const sanitized = { + ...doc, + title: this.sanitizeText(doc.title, 200), + // Add description as separate indexed field (for improved search relevance) + description: this.sanitizeText(doc.description, 300), + content: this.sanitizeText(doc.content, 5000), + summary: this.sanitizeText(doc.summary, 500), + headings: this.sanitizeHeadings(doc.headings), + headings_text: this.sanitizeText(doc.headings_text, 1000), + keywords: this.sanitizeArray(doc.keywords, 300), + tags: this.sanitizeArray(doc.tags, 200), + // Support both topics (new) and categories (legacy) + topics: this.sanitizeArray(doc.topics || doc.categories, 200), + // Support both audience (new) and personas (legacy) + audience: this.sanitizeArray(doc.audience || doc.personas, 200), + // Content type and difficulty + content_type: this.sanitizeText(doc.content_type, 50), + difficulty: this.sanitizeText(doc.difficulty, 50), + doc_type: this.sanitizeText(doc.doc_type, 50), + section_path: this.sanitizeArray(doc.section_path, 200), + author: this.sanitizeText(doc.author, 100) + }; + + // Preserve facets object (dynamic, user-defined keys) + if (doc.facets && typeof doc.facets === 'object') { + sanitized.facets = this.sanitizeFacets(doc.facets); + } + + // Preserve legacy flat modality if present and no facets.modality + if (doc.modality && (!doc.facets || !doc.facets.modality)) { + sanitized.modality = this.sanitizeText(doc.modality, 50); + } + + return sanitized; + } + + /** + * Sanitize facets object (dynamic keys with string or array values) + */ + sanitizeFacets(facets) { + const sanitized = {}; + Object.entries(facets).forEach(([key, value]) => { + if (Array.isArray(value)) { + sanitized[key] = value.map(v => String(v).substring(0, 100)); + } else if (value) { + sanitized[key] = String(value).substring(0, 100); + } + }); + return sanitized; + } + + /** + * Sanitize text content with length limits + */ + sanitizeText(text, maxLength) { + if (!text || typeof text !== 'string') return ''; + return text.substring(0, maxLength); + } + + /** + * Sanitize array content + */ + sanitizeArray(arr, maxLength) { + if (!Array.isArray(arr)) return []; + return arr.map(item => String(item)).join(' ').substring(0, maxLength); + } + + /** + * Sanitize headings array + */ + sanitizeHeadings(headings) { + if (!Array.isArray(headings)) return []; + return headings.map(heading => ({ + text: this.sanitizeText(heading.text, 200), + level: Number(heading.level) || 1 + })); + } + + /** + * Get all loaded documents + */ + getDocuments() { + return this.documents; + } + + /** + * Get a specific document by ID + */ + getDocument(id) { + return this.documents[id]; + } + + /** + * Get document count + */ + getDocumentCount() { + return Object.keys(this.documents).length; + } + + /** + * Check if documents are loaded + */ + isReady() { + return this.isLoaded && Object.keys(this.documents).length > 0; + } + + /** + * Get documents as array for indexing + */ + getDocumentsArray() { + return Object.values(this.documents); + } + + /** + * Filter documents by criteria + */ + filterDocuments(filterFn) { + return this.getDocumentsArray().filter(filterFn); + } + + /** + * Get document statistics + */ + getStatistics() { + const docs = this.getDocumentsArray(); + return { + totalDocuments: docs.length, + documentsWithSummary: docs.filter(d => d.summary).length, + documentsWithHeadings: docs.filter(d => d.headings && d.headings.length > 0).length, + documentsWithTags: docs.filter(d => d.tags && d.tags.length > 0).length, + averageContentLength: docs.reduce((sum, d) => sum + (d.content?.length || 0), 0) / docs.length + }; + } +} + +// Make DocumentLoader available globally +window.DocumentLoader = DocumentLoader; \ No newline at end of file diff --git a/docs/_extensions/search_assets/modules/EventHandler.js b/docs/_extensions/search_assets/modules/EventHandler.js new file mode 100644 index 0000000000..f1981c4748 --- /dev/null +++ b/docs/_extensions/search_assets/modules/EventHandler.js @@ -0,0 +1,298 @@ +/** + * EventHandler Module + * Handles keyboard shortcuts and event management for the search interface + */ + +class EventHandler { + constructor(enhancedSearch) { + this.enhancedSearch = enhancedSearch; + this.searchInterface = enhancedSearch.searchInterface; + this.resultRenderer = enhancedSearch.resultRenderer; + this.searchEngine = enhancedSearch.searchEngine; + this.utils = enhancedSearch.utils; + + // Track bound event listeners for cleanup + this.boundListeners = new Map(); + + // Debounced search function + this.debouncedSearch = this.utils.debounce(this.handleSearch.bind(this), 200); + } + + /** + * Bind all event listeners + */ + bindEvents() { + this.bindInputEvents(); + this.bindModalEvents(); + this.bindGlobalEvents(); + console.log('✅ Event handlers bound'); + } + + /** + * Bind input-related events + */ + bindInputEvents() { + const input = this.searchInterface.getInput(); + if (!input) return; + + // Search input + const inputHandler = (e) => this.debouncedSearch(e); + input.addEventListener('input', inputHandler); + this.boundListeners.set('input', inputHandler); + + // Keyboard navigation + const keydownHandler = (e) => this.handleKeyDown(e); + input.addEventListener('keydown', keydownHandler); + this.boundListeners.set('keydown', keydownHandler); + } + + /** + * Bind page-specific events (replaces modal events) + */ + bindModalEvents() { + // Check if we're on the search page + if (!this.searchInterface.isSearchPage()) { + return; + } + + // Get query parameter if we're on search page + const urlParams = new URLSearchParams(window.location.search); + const query = urlParams.get('q'); + + if (query) { + // Perform search immediately with the query from URL + setTimeout(() => { + const input = this.searchInterface.getInput(); + if (input) { + input.value = query; + this.handleSearch({ target: input }); + } + }, 100); + } + } + + /** + * Bind global keyboard shortcuts + */ + bindGlobalEvents() { + const globalKeyHandler = (e) => { + // Ctrl+K or Cmd+K to focus search input + if ((e.ctrlKey || e.metaKey) && e.key === 'k') { + e.preventDefault(); + // Focus the search input if we're on the search page + const searchInput = this.searchInterface.getInput(); + if (searchInput) { + searchInput.focus(); + } else { + // If not on search page, redirect to search page + window.location.href = 'search.html'; + } + return; + } + }; + + document.addEventListener('keydown', globalKeyHandler); + this.boundListeners.set('global', globalKeyHandler); + } + + /** + * Handle search input + */ + async handleSearch(event) { + const query = event.target.value.trim(); + const resultsContainer = this.searchInterface.getResultsContainer(); + + if (query.length < this.enhancedSearch.options.minQueryLength) { + this.searchInterface.showEmptyState(); + this.searchInterface.clearStats(); + return; + } + + try { + // Show loading state + this.resultRenderer.renderLoading(resultsContainer); + + // Perform search + const results = this.searchEngine.search(query, this.enhancedSearch.options.maxResults); + const count = results.length; + + // Render results + this.resultRenderer.render(results, query, resultsContainer); + + // Update stats + this.searchInterface.updateStats(query, count); + + // Emit search event for AI Assistant extension if available + this.emitSearchEvent(query, results, count); + + } catch (error) { + console.error('Search error:', error); + this.resultRenderer.renderError(resultsContainer, 'Search temporarily unavailable'); + this.searchInterface.clearStats(); + } + } + + /** + * Handle keyboard navigation + */ + handleKeyDown(event) { + const resultsContainer = this.searchInterface.getResultsContainer(); + + switch (event.key) { + case 'ArrowDown': + event.preventDefault(); + this.resultRenderer.selectNext(resultsContainer); + break; + + case 'ArrowUp': + event.preventDefault(); + this.resultRenderer.selectPrevious(resultsContainer); + break; + + case 'Enter': + event.preventDefault(); + this.resultRenderer.activateSelected(resultsContainer); + break; + + case 'Escape': + event.preventDefault(); + this.enhancedSearch.hide(); + break; + } + } + + /** + * Emit search event for other extensions + */ + emitSearchEvent(query, results, count) { + if (window.AIAssistant && window.aiAssistantInstance) { + const searchEvent = new CustomEvent('enhanced-search-results', { + detail: { query, results, count } + }); + document.dispatchEvent(searchEvent); + } + } + + /** + * Handle window resize + */ + handleResize() { + // Adjust modal positioning if needed + const modal = this.searchInterface.getModal(); + if (modal && this.searchInterface.isModalVisible()) { + // Could add responsive adjustments here + } + } + + /** + * Handle focus management + */ + handleFocus(event) { + // Trap focus within modal when visible + if (this.searchInterface.isModalVisible()) { + const modal = this.searchInterface.getModal(); + const focusableElements = modal.querySelectorAll( + 'button, input, select, textarea, [tabindex]:not([tabindex="-1"])' + ); + + const firstFocusable = focusableElements[0]; + const lastFocusable = focusableElements[focusableElements.length - 1]; + + if (event.key === 'Tab') { + if (event.shiftKey) { + // Shift + Tab + if (document.activeElement === firstFocusable) { + event.preventDefault(); + lastFocusable.focus(); + } + } else { + // Tab + if (document.activeElement === lastFocusable) { + event.preventDefault(); + firstFocusable.focus(); + } + } + } + } + } + + /** + * Bind additional event listeners + */ + bindAdditionalEvents() { + // Window resize + const resizeHandler = this.utils.debounce(() => this.handleResize(), 100); + window.addEventListener('resize', resizeHandler); + this.boundListeners.set('resize', resizeHandler); + + // Focus trap + const focusHandler = (e) => this.handleFocus(e); + document.addEventListener('keydown', focusHandler); + this.boundListeners.set('focus', focusHandler); + } + + /** + * Unbind all event listeners + */ + unbindEvents() { + // Remove input events + const input = this.searchInterface.getInput(); + if (input && this.boundListeners.has('input')) { + input.removeEventListener('input', this.boundListeners.get('input')); + input.removeEventListener('keydown', this.boundListeners.get('keydown')); + } + + // Remove modal events + const closeBtn = this.searchInterface.getCloseButton(); + if (closeBtn && this.boundListeners.has('close')) { + closeBtn.removeEventListener('click', this.boundListeners.get('close')); + } + + const backdrop = this.searchInterface.getBackdrop(); + if (backdrop && this.boundListeners.has('backdrop')) { + backdrop.removeEventListener('click', this.boundListeners.get('backdrop')); + } + + // Remove global events + if (this.boundListeners.has('global')) { + document.removeEventListener('keydown', this.boundListeners.get('global')); + } + + if (this.boundListeners.has('resize')) { + window.removeEventListener('resize', this.boundListeners.get('resize')); + } + + if (this.boundListeners.has('focus')) { + document.removeEventListener('keydown', this.boundListeners.get('focus')); + } + + // Clear listeners map + this.boundListeners.clear(); + + console.log('✅ Event handlers unbound'); + } + + /** + * Get event handler statistics + */ + getStatistics() { + return { + boundListeners: this.boundListeners.size, + modalVisible: this.searchInterface.isModalVisible(), + hasInput: !!this.searchInterface.getInput(), + hasModal: !!this.searchInterface.getModal() + }; + } + + /** + * Check if events are properly bound + */ + isReady() { + return this.boundListeners.size > 0 && + this.searchInterface.getInput() !== null && + this.searchInterface.getModal() !== null; + } +} + +// Make EventHandler available globally +window.EventHandler = EventHandler; \ No newline at end of file diff --git a/docs/_extensions/search_assets/modules/ResultRenderer.js b/docs/_extensions/search_assets/modules/ResultRenderer.js new file mode 100644 index 0000000000..1a963da966 --- /dev/null +++ b/docs/_extensions/search_assets/modules/ResultRenderer.js @@ -0,0 +1,263 @@ +/** + * ResultRenderer Module + * Handles rendering of search results in the interface + */ + +class ResultRenderer { + constructor(options, utils) { + this.options = options; + this.utils = utils; + } + + /** + * Render search results + */ + render(results, query, container) { + if (!container) { + console.warn('No container provided for rendering results'); + return; + } + + if (results.length === 0) { + container.innerHTML = this.renderNoResults(query); + return; + } + + const html = results.map((result, index) => { + const isSelected = index === 0; + return this.renderResultItem(result, query, isSelected); + }).join(''); + + container.innerHTML = `
${html}
`; + + // Bind click events + this.bindResultEvents(container, results); + } + + /** + * Render a single result item + */ + renderResultItem(result, query, isSelected = false) { + const title = this.utils.highlightText(result.title || 'Untitled', query); + const summary = this.utils.highlightText(result.summary || result.content?.substring(0, 200) || '', query); + const breadcrumb = this.utils.generateBreadcrumb(result.id); + + // Render matching sections + const sectionsHtml = this.renderMatchingSections(result, query); + + // Show multiple matches indicator + const multipleMatchesIndicator = result.totalMatches > 1 + ? `${result.totalMatches} matches` + : ''; + + return ` +
+
+
${title} ${multipleMatchesIndicator}
+
${summary}...
+ ${sectionsHtml} +
+ ${breadcrumb} + ${result.tags ? `${this.utils.safeArray(result.tags).slice(0, 3).map(tag => `${tag}`).join('')}` : ''} +
+
+
+ +
+
+ `; + } + + /** + * Render matching sections within a result + */ + renderMatchingSections(result, query) { + if (!result.matchingSections || result.matchingSections.length <= 1) { + return ''; + } + + // Show only the first few sections to avoid overwhelming + const sectionsToShow = result.matchingSections.slice(0, 4); + const hasMore = result.matchingSections.length > 4; + + const sectionsHtml = sectionsToShow.map(section => { + const icon = this.utils.getSectionIcon(section.type, section.level); + const sectionText = this.utils.highlightText(section.text, query); + const anchor = section.anchor ? `#${section.anchor}` : ''; + + return ` +
+ ${icon} ${sectionText} +
+ `; + }).join(''); + + const moreIndicator = hasMore + ? `
+${result.matchingSections.length - 4} more sections
` + : ''; + + return ` +
+ ${sectionsHtml} + ${moreIndicator} +
+ `; + } + + /** + * Render no results state + */ + renderNoResults(query) { + return ` +
+ +

No results found for "${this.utils.escapeHtml(query)}"

+
+ Try: +
    +
  • Checking for typos
  • +
  • Using different or more general terms
  • +
  • Using fewer keywords
  • +
+
+
+ `; + } + + /** + * Bind click events to result items + */ + bindResultEvents(container, results) { + container.querySelectorAll('.search-result-item').forEach((item, index) => { + const result = results[index]; + + // Main item click - go to document + item.addEventListener('click', (e) => { + // Don't trigger if clicking on a section + if (e.target.closest('.search-result-section')) { + return; + } + + const url = item.dataset.url; + window.location.href = url; + }); + + // Section clicks - go to specific section + item.querySelectorAll('.search-result-section').forEach(sectionEl => { + sectionEl.addEventListener('click', (e) => { + e.stopPropagation(); + const anchor = sectionEl.dataset.anchor; + const baseUrl = item.dataset.url; + window.location.href = baseUrl + anchor; + }); + }); + }); + } + + /** + * Get result items from container + */ + getResultItems(container) { + return container.querySelectorAll('.search-result-item'); + } + + /** + * Get selected result item + */ + getSelectedResult(container) { + return container.querySelector('.search-result-item.selected'); + } + + /** + * Select next result item + */ + selectNext(container) { + const results = this.getResultItems(container); + const selected = this.getSelectedResult(container); + + if (results.length === 0) return; + + if (!selected) { + results[0].classList.add('selected'); + return; + } + + const currentIndex = Array.from(results).indexOf(selected); + selected.classList.remove('selected'); + + const nextIndex = (currentIndex + 1) % results.length; + results[nextIndex].classList.add('selected'); + results[nextIndex].scrollIntoView({ block: 'nearest' }); + } + + /** + * Select previous result item + */ + selectPrevious(container) { + const results = this.getResultItems(container); + const selected = this.getSelectedResult(container); + + if (results.length === 0) return; + + if (!selected) { + results[results.length - 1].classList.add('selected'); + return; + } + + const currentIndex = Array.from(results).indexOf(selected); + selected.classList.remove('selected'); + + const prevIndex = currentIndex === 0 ? results.length - 1 : currentIndex - 1; + results[prevIndex].classList.add('selected'); + results[prevIndex].scrollIntoView({ block: 'nearest' }); + } + + /** + * Activate selected result + */ + activateSelected(container) { + const selected = this.getSelectedResult(container); + if (selected) { + selected.click(); + } + } + + /** + * Clear all selections + */ + clearSelection(container) { + const results = this.getResultItems(container); + results.forEach(result => result.classList.remove('selected')); + } + + /** + * Render loading state + */ + renderLoading(container) { + if (container) { + container.innerHTML = ` +
+ +

Searching...

+
+ `; + } + } + + /** + * Render error state + */ + renderError(container, message = 'Search error occurred') { + if (container) { + container.innerHTML = ` +
+ +

${this.utils.escapeHtml(message)}

+
+ `; + } + } +} + +// Make ResultRenderer available globally +window.ResultRenderer = ResultRenderer; \ No newline at end of file diff --git a/docs/_extensions/search_assets/modules/SearchEngine.js b/docs/_extensions/search_assets/modules/SearchEngine.js new file mode 100644 index 0000000000..d022a8a797 --- /dev/null +++ b/docs/_extensions/search_assets/modules/SearchEngine.js @@ -0,0 +1,817 @@ +/** + * SearchEngine Module + * Handles Lunr.js integration and search logic with filtering and grouping + */ + +class SearchEngine { + constructor(utils) { + this.utils = utils; + this.index = null; + this.documents = {}; + this.isInitialized = false; + // Support both new schema (topics, audience) and legacy (categories, personas) + this.topics = new Set(); + this.tags = new Set(); + this.documentTypes = new Set(); + this.audience = new Set(); + this.difficulties = new Set(); + // Dynamic facets - discovered from documents, not predefined + this.facets = {}; // { facetKey: Set of values } + } + + /** + * Initialize the search engine with documents + */ + async initialize(documents) { + try { + await this.loadLunr(); + this.documents = documents; + this.collectMetadata(); + this.buildIndex(); + this.isInitialized = true; + } catch (error) { + throw error; + } + } + + /** + * Collect metadata for filtering using actual frontmatter values + * Supports both new schema (topics, audience) and legacy (categories, personas) + * Dynamically discovers all facet keys from documents + */ + collectMetadata() { + // Clear existing sets + this.topics = new Set(); + this.tags = new Set(); + this.documentTypes = new Set(); + this.audience = new Set(); + this.difficulties = new Set(); + this.facets = {}; // Reset dynamic facets + + Object.values(this.documents).forEach(doc => { + // Collect topics (new schema) or categories (legacy) + const topicsField = doc.topics || doc.categories; + if (topicsField) { + if (Array.isArray(topicsField)) { + topicsField.forEach(topic => this.topics.add(topic)); + } else if (typeof topicsField === 'string') { + topicsField.split(',').forEach(topic => this.topics.add(topic.trim())); + } + } + + // Collect actual frontmatter tags + if (doc.tags) { + if (Array.isArray(doc.tags)) { + doc.tags.forEach(tag => { + // Split space-separated tags and add individually + if (typeof tag === 'string' && tag.includes(' ')) { + tag.split(' ').forEach(individualTag => { + if (individualTag.trim()) { + this.tags.add(individualTag.trim()); + } + }); + } else if (tag && tag.trim()) { + this.tags.add(tag.trim()); + } + }); + } else if (typeof doc.tags === 'string') { + // Handle both comma-separated and space-separated tags + const allTags = doc.tags.includes(',') + ? doc.tags.split(',') + : doc.tags.split(' '); + + allTags.forEach(tag => { + if (tag && tag.trim()) { + this.tags.add(tag.trim()); + } + }); + } + } + + // Use actual content_type from frontmatter (not calculated doc_type) + if (doc.content_type) { + this.documentTypes.add(doc.content_type); + } + + // Collect audience (new schema) or personas (legacy) + const audienceField = doc.audience || doc.personas; + if (audienceField) { + if (Array.isArray(audienceField)) { + audienceField.forEach(aud => this.audience.add(aud)); + } else if (typeof audienceField === 'string') { + this.audience.add(audienceField); + } + } + + if (doc.difficulty) { + this.difficulties.add(doc.difficulty); + } + + // Dynamically discover all facets from documents + if (doc.facets && typeof doc.facets === 'object') { + Object.entries(doc.facets).forEach(([facetKey, facetValue]) => { + // Initialize Set for this facet if not exists + if (!this.facets[facetKey]) { + this.facets[facetKey] = new Set(); + } + // Add value(s) to the facet Set + if (Array.isArray(facetValue)) { + facetValue.forEach(v => this.facets[facetKey].add(v)); + } else if (facetValue) { + this.facets[facetKey].add(facetValue); + } + }); + } + + // Also check for flat facet fields (legacy modality, etc.) + // These get added to facets dynamically + if (doc.modality && !this.facets.modality) { + this.facets.modality = new Set(); + } + if (doc.modality) { + this.facets.modality.add(doc.modality); + } + }); + } + + /** + * Get available filter options using actual frontmatter taxonomy + * Returns both new field names and legacy names for backwards compatibility + * Includes dynamically discovered facets + */ + getFilterOptions() { + // Convert dynamic facets from Sets to sorted arrays + const facetOptions = {}; + Object.entries(this.facets).forEach(([facetKey, facetSet]) => { + facetOptions[facetKey] = Array.from(facetSet).sort(); + }); + + return { + // New schema names + topics: Array.from(this.topics).sort(), + audience: Array.from(this.audience).sort(), + // Legacy names (aliases for backwards compatibility) + categories: Array.from(this.topics).sort(), + personas: Array.from(this.audience).sort(), + // Common fields + tags: Array.from(this.tags).sort(), + documentTypes: Array.from(this.documentTypes).sort(), + difficulties: Array.from(this.difficulties).sort(), + // Dynamic facets (user-defined, discovered from documents) + facets: facetOptions + }; + } + + /** + * Load Lunr.js library if not already loaded + */ + async loadLunr() { + if (typeof lunr === 'undefined') { + await this.utils.loadScript('https://unpkg.com/lunr@2.3.9/lunr.min.js'); + } + } + + /** + * Build the Lunr search index + * Supports both new schema (topics, audience) and legacy (categories, personas) + * + * Field boosting rationale: + * - Title matches are almost always what users want (highest boost) + * - Description (from frontmatter) is hand-crafted summary (high boost) + * - Headings provide structural relevance (medium-high boost) + * - Content gets lowest boost to prevent long documents from dominating + * - Hierarchy: title > description > headings/keywords > tags > content + */ + buildIndex() { + const documentsArray = Object.values(this.documents); + const self = this; + + this.index = lunr(function() { + // Define fields with optimized boosting for documentation search patterns + this.ref('id'); + + // Primary fields - highest relevance + this.field('title', { boost: 10 }); // Title matches most important + this.field('description', { boost: 8 }); // Frontmatter description (hand-crafted) + + // Secondary fields - structural relevance + this.field('keywords', { boost: 7 }); // Explicit keywords + this.field('headings_text', { boost: 5 }); // Section headings + this.field('headings', { boost: 5 }); // Section headings (legacy format) + this.field('tags', { boost: 4 }); // Taxonomy tags + + // Tertiary fields - content matching + this.field('summary', { boost: 3 }); // Summary field + this.field('topics', { boost: 2 }); // Topic categorization + this.field('content', { boost: 1 }); // Full content (low to prevent long docs dominating) + + // Metadata fields - filtering support + this.field('content_type', { boost: 1 }); + this.field('audience', { boost: 1 }); + this.field('difficulty', { boost: 1 }); + this.field('modality', { boost: 1 }); + this.field('section_path', { boost: 1 }); + this.field('author', { boost: 1 }); + + // Add documents to index + documentsArray.forEach((doc) => { + try { + this.add({ + id: doc.id, + title: doc.title || '', + description: doc.description || '', // NEW: separate indexed field + content: (doc.content || '').substring(0, 5000), // Limit content length + summary: doc.summary || '', + headings: self.extractHeadingsText(doc.headings), + headings_text: doc.headings_text || '', + keywords: self.arrayToString(doc.keywords), + tags: self.arrayToString(doc.tags), + // Support both topics (new) and categories (legacy) + topics: self.arrayToString(doc.topics || doc.categories), + content_type: doc.content_type || '', + // Support both audience (new) and personas (legacy) + audience: self.arrayToString(doc.audience || doc.personas), + difficulty: doc.difficulty || '', + modality: doc.modality || '', + section_path: self.arrayToString(doc.section_path), + author: doc.author || '' + }); + } catch (docError) { + // Skip documents that fail to index + } + }, this); + }); + } + + /** + * Convert array to string for indexing + */ + arrayToString(arr) { + if (Array.isArray(arr)) { + return arr.join(' '); + } + return arr || ''; + } + + /** + * Extract text from headings array + */ + extractHeadingsText(headings) { + if (!Array.isArray(headings)) return ''; + return headings.map(h => h.text || '').join(' '); + } + + /** + * Perform search with query and optional filters + */ + search(query, filters = {}, maxResults = 20) { + if (!this.isInitialized || !this.index) { + return []; + } + + if (!query || query.trim().length < 2) { + return []; + } + + try { + // Enhanced search with multiple strategies + const results = this.performMultiStrategySearch(query); + + // Process and enhance results + const enhancedResults = this.enhanceResults(results, query); + + // Apply filters + const filteredResults = this.applyFilters(enhancedResults, filters); + + // Group and rank results + const groupedResults = this.groupResultsByDocument(filteredResults, query); + + return groupedResults.slice(0, maxResults); + + } catch (error) { + return []; + } + } + + /** + * Apply filters to search results + * Supports both new schema (topic, audience) and legacy (category, persona) filter names + * Handles dynamic facet filters + */ + applyFilters(results, filters) { + return results.filter(result => { + // Topic filter (new) or category filter (legacy) + const topicFilter = filters.topic || filters.category; + if (topicFilter && topicFilter !== '') { + const docTopics = this.getDocumentTopics(result); + if (!docTopics.includes(topicFilter)) { + return false; + } + } + + // Tag filter + if (filters.tag && filters.tag !== '') { + const docTags = this.getDocumentTags(result); + if (!docTags.includes(filters.tag)) { + return false; + } + } + + // Document type filter (using actual frontmatter content_type) + if (filters.type && filters.type !== '') { + if (result.content_type !== filters.type) { + return false; + } + } + + // Audience filter (new) or persona filter (legacy) + const audienceFilter = filters.audience || filters.persona; + if (audienceFilter && audienceFilter !== '') { + const docAudience = this.getDocumentAudience(result); + if (!docAudience.includes(audienceFilter)) { + return false; + } + } + + // Difficulty filter + if (filters.difficulty && filters.difficulty !== '') { + if (result.difficulty !== filters.difficulty) { + return false; + } + } + + // Dynamic facet filters (e.g., filters.facets = { modality: 'text-only', framework: 'pytorch' }) + if (filters.facets && typeof filters.facets === 'object') { + for (const [facetKey, facetValue] of Object.entries(filters.facets)) { + if (facetValue && facetValue !== '') { + const docFacetValue = this.getDocumentFacet(result, facetKey); + if (!docFacetValue.includes(facetValue)) { + return false; + } + } + } + } + + // Legacy flat facet filters (e.g., filters.modality directly) + // Check for any filter key that matches a known facet + for (const facetKey of Object.keys(this.facets)) { + if (filters[facetKey] && filters[facetKey] !== '') { + const docFacetValue = this.getDocumentFacet(result, facetKey); + if (!docFacetValue.includes(filters[facetKey])) { + return false; + } + } + } + + return true; + }); + } + + /** + * Get a specific facet value for a document + */ + getDocumentFacet(doc, facetKey) { + // Check nested facets object first + if (doc.facets && doc.facets[facetKey]) { + const value = doc.facets[facetKey]; + return Array.isArray(value) ? value : [value]; + } + // Check flat field (legacy) + if (doc[facetKey]) { + const value = doc[facetKey]; + return Array.isArray(value) ? value : [value]; + } + return []; + } + + /** + * Get topics for a document (supports new schema and legacy categories) + */ + getDocumentTopics(doc) { + const topics = []; + + // From explicit topics (new schema) or categories (legacy) + const topicsField = doc.topics || doc.categories; + if (topicsField) { + if (Array.isArray(topicsField)) { + topics.push(...topicsField); + } else { + topics.push(...topicsField.split(',').map(t => t.trim())); + } + } + + // From section path + if (doc.section_path && Array.isArray(doc.section_path)) { + topics.push(...doc.section_path); + } + + // From document ID path + if (doc.id) { + const pathParts = doc.id.split('/').filter(part => part && part !== 'index'); + topics.push(...pathParts); + } + + return [...new Set(topics)]; // Remove duplicates + } + + /** + * Get categories for a document (legacy alias for getDocumentTopics) + */ + getDocumentCategories(doc) { + return this.getDocumentTopics(doc); + } + + /** + * Get tags for a document + */ + getDocumentTags(doc) { + if (!doc.tags) return []; + + if (Array.isArray(doc.tags)) { + // Handle array of tags that might contain space-separated strings + const flatTags = []; + doc.tags.forEach(tag => { + if (typeof tag === 'string' && tag.includes(' ')) { + // Split space-separated tags + tag.split(' ').forEach(individualTag => { + if (individualTag.trim()) { + flatTags.push(individualTag.trim()); + } + }); + } else if (tag && tag.trim()) { + flatTags.push(tag.trim()); + } + }); + return flatTags; + } + + // Handle string tags - check for both comma and space separation + if (typeof doc.tags === 'string') { + const allTags = []; + const tagString = doc.tags.trim(); + + if (tagString.includes(',')) { + // Comma-separated tags + tagString.split(',').forEach(tag => { + if (tag.trim()) { + allTags.push(tag.trim()); + } + }); + } else { + // Space-separated tags + tagString.split(' ').forEach(tag => { + if (tag.trim()) { + allTags.push(tag.trim()); + } + }); + } + + return allTags; + } + + return []; + } + + + /** + * Get audience for a document (supports new schema and legacy personas) + */ + getDocumentAudience(doc) { + // Support both audience (new) and personas (legacy) + const audienceField = doc.audience || doc.personas; + if (!audienceField) return []; + + if (Array.isArray(audienceField)) { + return audienceField; + } + + return [audienceField]; + } + + /** + * Get personas for a document (legacy alias for getDocumentAudience) + */ + getDocumentPersonas(doc) { + return this.getDocumentAudience(doc); + } + + /** + * Perform search with multiple strategies + */ + performMultiStrategySearch(query) { + const strategies = [ + // Exact phrase search with wildcards + `"${query}" ${query}*`, + // Fuzzy search with wildcards + `${query}* ${query}~2`, + // Individual terms with boost + query.split(/\s+/).map(term => `${term}*`).join(' '), + // Fallback: just the query + query + ]; + + let allResults = []; + const seenIds = new Set(); + + for (const strategy of strategies) { + try { + const results = this.index.search(strategy); + + // Add new results (avoid duplicates) + results.forEach(result => { + if (!seenIds.has(result.ref)) { + seenIds.add(result.ref); + allResults.push({ + ...result, + strategy: strategy + }); + } + }); + + // If we have enough good results, stop + if (allResults.length >= 30) break; + + } catch (strategyError) { + console.warn(`Search strategy failed: ${strategy}`, strategyError); + } + } + + return allResults; + } + + /** + * Enhance search results with document data and apply re-ranking + */ + enhanceResults(results, query) { + const queryLower = query.toLowerCase().trim(); + const queryTerms = queryLower.split(/\s+/); + + return results.map(result => { + const doc = this.documents[result.ref]; + if (!doc) { + console.warn(`Document not found: ${result.ref}`); + return null; + } + + // Calculate additional relevance boost for title matches + const titleBoost = this.calculateTitleBoost(doc, queryLower, queryTerms); + const keywordBoost = this.calculateKeywordBoost(doc, queryTerms); + const descriptionBoost = this.calculateDescriptionBoost(doc, queryTerms); + + // Apply boosts to base score + const enhancedScore = result.score * (1 + titleBoost + keywordBoost + descriptionBoost); + + return { + ...doc, + score: enhancedScore, + baseScore: result.score, + titleBoost, + keywordBoost, + descriptionBoost, + matchedTerms: Object.keys(result.matchData?.metadata || {}), + matchData: result.matchData, + strategy: result.strategy + }; + }).filter(Boolean); // Remove null results + } + + /** + * Calculate boost for title matches + * Heavily rewards exact and partial title matches + */ + calculateTitleBoost(doc, queryLower, queryTerms) { + if (!doc.title) return 0; + + const titleLower = doc.title.toLowerCase(); + let boost = 0; + + // Exact title match (highest boost) + if (titleLower === queryLower) { + boost += 10; + } + // Title starts with query + else if (titleLower.startsWith(queryLower)) { + boost += 8; + } + // Query is a significant part of title (e.g., "audit" in "Documentation Audit Guide") + else if (titleLower.includes(queryLower)) { + // Boost more if query is a larger portion of the title + const ratio = queryLower.length / titleLower.length; + boost += 5 * ratio + 3; + } + // All query terms appear in title + else if (queryTerms.every(term => titleLower.includes(term))) { + boost += 4; + } + // Some query terms appear in title + else { + const matchingTerms = queryTerms.filter(term => titleLower.includes(term)); + if (matchingTerms.length > 0) { + boost += 2 * (matchingTerms.length / queryTerms.length); + } + } + + // Additional boost if title contains query as a distinct word + const titleWords = titleLower.split(/[\s\-_:]+/); + if (titleWords.some(word => word === queryLower || word.startsWith(queryLower))) { + boost += 2; + } + + return boost; + } + + /** + * Calculate boost for keyword matches + */ + calculateKeywordBoost(doc, queryTerms) { + if (!doc.keywords) return 0; + + const keywords = Array.isArray(doc.keywords) + ? doc.keywords.map(k => k.toLowerCase()) + : doc.keywords.toLowerCase().split(/[\s,]+/); + + let boost = 0; + + queryTerms.forEach(term => { + if (keywords.some(kw => kw === term || kw.startsWith(term))) { + boost += 1.5; + } + }); + + return boost; + } + + /** + * Calculate boost for description matches + */ + calculateDescriptionBoost(doc, queryTerms) { + if (!doc.description) return 0; + + const descLower = doc.description.toLowerCase(); + let boost = 0; + + // Check if query terms appear early in description + queryTerms.forEach(term => { + const pos = descLower.indexOf(term); + if (pos !== -1) { + // Boost more if term appears early + boost += pos < 50 ? 1 : 0.5; + } + }); + + return boost; + } + + /** + * Group results by document and find matching sections + */ + groupResultsByDocument(results, query) { + const grouped = new Map(); + + results.forEach(result => { + const docId = result.id; + + if (!grouped.has(docId)) { + // Find matching sections within this document + const matchingSections = this.findMatchingSections(result, query); + + grouped.set(docId, { + ...result, + matchingSections, + totalMatches: 1, + combinedScore: result.score + }); + } else { + // Document already exists, combine scores and sections + const existing = grouped.get(docId); + const additionalSections = this.findMatchingSections(result, query); + + existing.matchingSections = this.mergeSections(existing.matchingSections, additionalSections); + existing.totalMatches += 1; + existing.combinedScore = Math.max(existing.combinedScore, result.score); + } + }); + + // Convert map to array and sort by combined score + return Array.from(grouped.values()) + .sort((a, b) => b.combinedScore - a.combinedScore); + } + + /** + * Find matching sections within a document + */ + findMatchingSections(result, query) { + const matchingSections = []; + const queryTerms = query.toLowerCase().split(/\s+/); + + // Check if title matches + if (result.title) { + const titleText = result.title.toLowerCase(); + const hasMatch = queryTerms.some(term => titleText.includes(term)); + + if (hasMatch) { + matchingSections.push({ + type: 'title', + text: result.title, + level: 1, + anchor: '' + }); + } + } + + // Check headings for matches + if (result.headings && Array.isArray(result.headings)) { + result.headings.forEach(heading => { + const headingText = heading.text?.toLowerCase() || ''; + const hasMatch = queryTerms.some(term => headingText.includes(term)); + + if (hasMatch) { + matchingSections.push({ + type: 'heading', + text: heading.text, + level: heading.level || 2, + anchor: this.generateAnchor(heading.text) + }); + } + }); + } + + // If no specific sections found, add a general content match + if (matchingSections.length === 0) { + matchingSections.push({ + type: 'content', + text: 'Content match', + level: 0, + anchor: '' + }); + } + + return matchingSections; + } + + /** + * Generate anchor link similar to how Sphinx does it + */ + generateAnchor(headingText) { + if (!headingText) return ''; + + return headingText + .toLowerCase() + .replace(/[^\w\s-]/g, '') // Remove special chars + .replace(/\s+/g, '-') // Replace spaces with hyphens + .trim(); + } + + /** + * Merge sections, avoiding duplicates + */ + mergeSections(existing, additional) { + const merged = [...existing]; + + additional.forEach(section => { + const isDuplicate = existing.some(existingSection => + existingSection.text === section.text && + existingSection.type === section.type + ); + + if (!isDuplicate) { + merged.push(section); + } + }); + + return merged; + } + + /** + * Get search statistics + */ + getStatistics() { + // Count facet keys and total values + const facetStats = {}; + Object.entries(this.facets).forEach(([key, valueSet]) => { + facetStats[key] = valueSet.size; + }); + + return { + documentsIndexed: Object.keys(this.documents).length, + topicsAvailable: this.topics.size, + tagsAvailable: this.tags.size, + documentTypesAvailable: this.documentTypes.size, + audienceAvailable: this.audience.size, + difficultiesAvailable: this.difficulties.size, + facetsDiscovered: Object.keys(this.facets).length, + facetStats: facetStats, + isInitialized: this.isInitialized + }; + } + + /** + * Check if the search engine is ready + */ + isReady() { + return this.isInitialized && this.index !== null; + } +} + +// Make SearchEngine available globally +window.SearchEngine = SearchEngine; \ No newline at end of file diff --git a/docs/_extensions/search_assets/modules/SearchInterface.js b/docs/_extensions/search_assets/modules/SearchInterface.js new file mode 100644 index 0000000000..569280b818 --- /dev/null +++ b/docs/_extensions/search_assets/modules/SearchInterface.js @@ -0,0 +1,615 @@ +/** + * SearchInterface Module + * Handles the creation and management of the search UI + */ + +class SearchInterface { + constructor(options) { + this.options = options; + this.isVisible = false; + this.modal = null; + this.input = null; + this.resultsContainer = null; + this.statsContainer = null; + } + + /** + * Create the search interface elements + */ + create() { + // Check if we're on the search page + if (this.isSearchPage()) { + this.enhanceSearchPage(); + } else { + // On other pages, create the modal for search functionality + this.createModal(); + this.enhanceSearchButton(); + } + console.log('✅ Search interface created'); + } + + /** + * Check if we're on the search page + */ + isSearchPage() { + return window.location.pathname.includes('/search') || + window.location.pathname.includes('/search.html') || + window.location.pathname.endsWith('search/') || + document.querySelector('#search-results') !== null || + document.querySelector('.search-page') !== null || + document.querySelector('form[action*="search"]') !== null || + document.title.toLowerCase().includes('search') || + document.querySelector('h1')?.textContent.toLowerCase().includes('search'); + } + + /** + * Enhance the existing search page using the template structure + */ + enhanceSearchPage() { + console.log('🔍 Enhancing search page using existing template...'); + console.log('📄 Page URL:', window.location.href); + console.log('📋 Page title:', document.title); + + // Use the template's existing elements + this.input = document.querySelector('#enhanced-search-page-input'); + this.resultsContainer = document.querySelector('#enhanced-search-page-results'); + + console.log('🔎 Template search input found:', !!this.input); + console.log('📦 Template results container found:', !!this.resultsContainer); + + if (this.input && this.resultsContainer) { + console.log('✅ Using existing template structure - no additional setup needed'); + // The template's JavaScript will handle everything + return; + } + + // Fallback for non-template pages + console.log('⚠️ Template elements not found, falling back to generic search page detection'); + this.fallbackToGenericSearchPage(); + } + + /** + * Fallback for pages that don't use the template + */ + fallbackToGenericSearchPage() { + // Find existing search elements on generic pages + this.input = document.querySelector('#searchbox input[type="text"]') || + document.querySelector('input[name="q"]') || + document.querySelector('.search input[type="text"]'); + + // Find or create results container + this.resultsContainer = document.querySelector('#search-results') || + document.querySelector('.search-results') || + this.createResultsContainer(); + + // Create stats container + this.statsContainer = this.createStatsContainer(); + + // Hide default Sphinx search results if they exist + this.hideDefaultResults(); + + // Initialize with empty state + this.showEmptyState(); + + console.log('✅ Generic search page enhanced'); + } + + /** + * Create results container if it doesn't exist + */ + createResultsContainer() { + const container = document.createElement('div'); + container.id = 'enhanced-search-results'; + container.className = 'enhanced-search-results'; + + // Add basic styling to ensure proper positioning + container.style.cssText = ` + width: 100%; + max-width: none; + margin: 1rem 0; + clear: both; + position: relative; + z-index: 1; + `; + + // Find the best place to insert it within the main content area + const insertLocation = this.findBestInsertLocation(); + + if (insertLocation.parent && insertLocation.method === 'append') { + insertLocation.parent.appendChild(container); + console.log(`✅ Results container added to: ${insertLocation.parent.className || insertLocation.parent.tagName}`); + } else if (insertLocation.parent && insertLocation.method === 'after') { + insertLocation.parent.insertAdjacentElement('afterend', container); + console.log(`✅ Results container added after: ${insertLocation.parent.className || insertLocation.parent.tagName}`); + } else { + // Last resort - create a wrapper in main content + this.createInMainContent(container); + } + + return container; + } + + /** + * Find the best location to insert search results + */ + findBestInsertLocation() { + // Try to find existing search-related elements first + let searchResults = document.querySelector('.search-results, #search-results'); + if (searchResults) { + return { parent: searchResults, method: 'append' }; + } + + // Look for search form and place results after it + let searchForm = document.querySelector('#searchbox, .search form, form[action*="search"]'); + if (searchForm) { + return { parent: searchForm, method: 'after' }; + } + + // Look for main content containers (common Sphinx/theme classes) + const mainSelectors = [ + '.document .body', + '.document .documentwrapper', + '.content', + '.main-content', + '.page-content', + 'main', + '.container .row .col', + '.rst-content', + '.body-content' + ]; + + for (const selector of mainSelectors) { + const element = document.querySelector(selector); + if (element) { + return { parent: element, method: 'append' }; + } + } + + // Try to find any container that's not the body + const anyContainer = document.querySelector('.container, .wrapper, .page, #content'); + if (anyContainer) { + return { parent: anyContainer, method: 'append' }; + } + + return { parent: null, method: null }; + } + + /** + * Create container in main content as last resort + */ + createInMainContent(container) { + // Create a wrapper section + const wrapper = document.createElement('section'); + wrapper.className = 'search-page-content'; + wrapper.style.cssText = ` + max-width: 800px; + margin: 2rem auto; + padding: 0 1rem; + `; + + // Add a title + const title = document.createElement('h1'); + title.textContent = 'Search Results'; + title.style.cssText = 'margin-bottom: 1rem;'; + wrapper.appendChild(title); + + // Add the container + wrapper.appendChild(container); + + // Insert into body, but with proper styling + document.body.appendChild(wrapper); + + console.log('⚠️ Created search results in body with wrapper - consider improving page structure'); + } + + /** + * Create stats container + */ + createStatsContainer() { + const container = document.createElement('div'); + container.className = 'enhanced-search-stats'; + container.style.cssText = 'margin: 1rem 0; font-size: 0.9rem; color: #666;'; + + // Insert before results + if (this.resultsContainer && this.resultsContainer.parentNode) { + this.resultsContainer.parentNode.insertBefore(container, this.resultsContainer); + } + + return container; + } + + /** + * Hide default Sphinx search results + */ + hideDefaultResults() { + // Hide default search results that Sphinx might show + const defaultResults = document.querySelectorAll( + '.search-summary, .search li, #search-results .search, .searchresults' + ); + defaultResults.forEach(el => { + el.style.display = 'none'; + }); + } + + /** + * Create the main search modal (legacy - kept for compatibility) + */ + createModal() { + // Enhanced search modal + const modal = document.createElement('div'); + modal.id = 'enhanced-search-modal'; + modal.className = 'enhanced-search-modal'; + modal.innerHTML = ` +
+
+
+
+ + + +
+
+
+
+ +
+ `; + + document.body.appendChild(modal); + + // Cache references + this.modal = modal; + this.input = modal.querySelector('#enhanced-search-input'); + this.resultsContainer = modal.querySelector('.enhanced-search-results'); + this.statsContainer = modal.querySelector('.enhanced-search-stats'); + + // Add event handlers for closing the modal + const closeButton = modal.querySelector('.enhanced-search-close'); + const backdrop = modal.querySelector('.enhanced-search-backdrop'); + + if (closeButton) { + closeButton.addEventListener('click', () => this.hideModal()); + } + + if (backdrop) { + backdrop.addEventListener('click', () => this.hideModal()); + } + + // Hide modal by default + modal.style.display = 'none'; + + // Initialize with empty state + this.showEmptyState(); + } + + /** + * Replace or enhance existing search button to show modal + */ + enhanceSearchButton() { + // Find existing search button/form + const searchForm = document.querySelector('#searchbox form') || + document.querySelector('.search form') || + document.querySelector('form[action*="search"]'); + + if (searchForm) { + // Prevent form submission and show modal instead + searchForm.addEventListener('submit', (e) => { + e.preventDefault(); + this.showModal(); + }); + console.log('✅ Search form enhanced to show modal'); + } + + // Find search button specifically and enhance it + const existingButton = document.querySelector('.search-button-field, .search-button__button'); + if (existingButton) { + existingButton.addEventListener('click', (e) => { + e.preventDefault(); + this.showModal(); + }); + console.log('✅ Search button enhanced to show modal'); + } + + // Also look for search input fields and enhance them + const searchInput = document.querySelector('#searchbox input[type="text"]') || + document.querySelector('.search input[type="text"]'); + if (searchInput) { + searchInput.addEventListener('focus', () => { + this.showModal(); + }); + console.log('✅ Search input enhanced to show modal on focus'); + } + } + + /** + * Show the search interface (focus input or show modal) + */ + show() { + if (this.modal) { + this.showModal(); + } else if (this.input) { + this.input.focus(); + this.input.select(); + } + } + + /** + * Hide the search interface (hide modal or blur input) + */ + hide() { + if (this.modal) { + this.hideModal(); + } else if (this.input) { + this.input.blur(); + } + } + + /** + * Show the modal + */ + showModal() { + if (this.modal) { + this.modal.style.display = 'flex'; + this.modal.classList.add('visible'); + this.isVisible = true; + // Focus the input after a brief delay to ensure modal is visible + setTimeout(() => { + if (this.input) { + this.input.focus(); + this.input.select(); + } + }, 100); + console.log('🔍 Search modal shown'); + } + } + + /** + * Hide the modal + */ + hideModal() { + if (this.modal) { + this.modal.classList.remove('visible'); + this.isVisible = false; + // Hide after animation completes + setTimeout(() => { + if (this.modal) { + this.modal.style.display = 'none'; + } + }, 200); + // Clear any search results + this.showEmptyState(); + console.log('🔍 Search modal hidden'); + } + } + + /** + * Get the search input element + */ + getInput() { + return this.input; + } + + /** + * Get the results container + */ + getResultsContainer() { + return this.resultsContainer; + } + + /** + * Get the stats container + */ + getStatsContainer() { + return this.statsContainer; + } + + /** + * Get the modal element + */ + getModal() { + return this.modal; + } + + /** + * Check if modal is visible + */ + isModalVisible() { + return this.isVisible && this.modal && this.modal.style.display !== 'none'; + } + + /** + * Show empty state in results + */ + showEmptyState() { + if (this.resultsContainer) { + this.resultsContainer.innerHTML = ` +
+ +

Start typing to search documentation...

+
+ Search tips: +
    +
  • Use specific terms for better results
  • +
  • Try different keywords if you don't find what you're looking for
  • +
  • Search includes titles, content, headings, and tags
  • +
+
+
+ `; + } + } + + /** + * Show no results state + */ + showNoResults(query) { + if (this.resultsContainer) { + this.resultsContainer.innerHTML = ` +
+ +

No results found for "${this.escapeHtml(query)}"

+
+ Try: +
    +
  • Checking for typos
  • +
  • Using different or more general terms
  • +
  • Using fewer keywords
  • +
+
+
+ `; + } + } + + /** + * Show error state + */ + showError(message = 'Search temporarily unavailable') { + if (this.resultsContainer) { + this.resultsContainer.innerHTML = ` +
+ +

${this.escapeHtml(message)}

+
+ `; + } + } + + /** + * Update search statistics + */ + updateStats(query, count) { + if (this.statsContainer) { + if (count > 0) { + this.statsContainer.innerHTML = `${count} result${count !== 1 ? 's' : ''} for "${this.escapeHtml(query)}"`; + } else { + this.statsContainer.innerHTML = `No results for "${this.escapeHtml(query)}"`; + } + } + } + + /** + * Clear search statistics + */ + clearStats() { + if (this.statsContainer) { + this.statsContainer.innerHTML = ''; + } + } + + /** + * Get current search query + */ + getQuery() { + return this.input ? this.input.value.trim() : ''; + } + + /** + * Set search query + */ + setQuery(query) { + if (this.input) { + this.input.value = query; + } + } + + /** + * Clear search query + */ + clearQuery() { + if (this.input) { + this.input.value = ''; + } + } + + /** + * Focus the search input + */ + focusInput() { + if (this.input) { + this.input.focus(); + } + } + + /** + * Get close button for event binding + */ + getCloseButton() { + return this.modal ? this.modal.querySelector('.enhanced-search-close') : null; + } + + /** + * Get backdrop for event binding + */ + getBackdrop() { + return this.modal ? this.modal.querySelector('.enhanced-search-backdrop') : null; + } + + /** + * Escape HTML to prevent XSS + */ + escapeHtml(unsafe) { + return unsafe + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); + } + + /** + * Add CSS class to modal + */ + addModalClass(className) { + if (this.modal) { + this.modal.classList.add(className); + } + } + + /** + * Remove CSS class from modal + */ + removeModalClass(className) { + if (this.modal) { + this.modal.classList.remove(className); + } + } + + /** + * Check if modal has class + */ + hasModalClass(className) { + return this.modal ? this.modal.classList.contains(className) : false; + } + + /** + * Destroy the search interface + */ + destroy() { + if (this.modal) { + this.modal.remove(); + this.modal = null; + this.input = null; + this.resultsContainer = null; + this.statsContainer = null; + } + this.isVisible = false; + } +} + +// Make SearchInterface available globally +window.SearchInterface = SearchInterface; \ No newline at end of file diff --git a/docs/_extensions/search_assets/modules/SearchPageManager.js b/docs/_extensions/search_assets/modules/SearchPageManager.js new file mode 100644 index 0000000000..6122fcc2af --- /dev/null +++ b/docs/_extensions/search_assets/modules/SearchPageManager.js @@ -0,0 +1,1204 @@ +/** + * Search Page Manager Module + * Handles search functionality on the dedicated search page with filtering and grouping + */ + +class SearchPageManager { + constructor() { + this.searchInput = null; + this.resultsContainer = null; + this.searchEngine = null; + this.documents = []; + this.currentQuery = ''; + this.allResults = []; + this.currentFilters = { + topic: '', + category: '', // Legacy alias + tag: '', + type: '', + facets: {} // Dynamic facets + }; + this.filterOptions = { + topics: [], + categories: [], // Legacy alias + tags: [], + documentTypes: [], + audience: [], + personas: [], // Legacy alias + difficulties: [], + facets: {} // Dynamic facets + }; + + this.init(); + } + + async init() { + console.log('🔍 Initializing search page...'); + + // Get page elements + this.searchInput = document.querySelector('#enhanced-search-page-input'); + this.resultsContainer = document.querySelector('#enhanced-search-page-results'); + + if (!this.searchInput || !this.resultsContainer) { + console.error('❌ Required search page elements not found'); + return; + } + + // Wait for enhanced search to be available + await this.waitForEnhancedSearch(); + + // Create filter interface + this.createFilterInterface(); + + // Set up event listeners + this.setupEventListeners(); + + // Handle URL search parameter + this.handleUrlSearch(); + + console.log('✅ Search page initialized'); + } + + async waitForEnhancedSearch() { + return new Promise((resolve) => { + const checkForSearch = () => { + if (window.enhancedSearchInstance && window.enhancedSearchInstance.isLoaded) { + this.searchEngine = window.enhancedSearchInstance.getSearchEngine(); + this.documents = window.enhancedSearchInstance.getDocuments(); + + // Get filter options + if (this.searchEngine && this.searchEngine.getFilterOptions) { + this.filterOptions = this.searchEngine.getFilterOptions(); + console.log('✅ Filter options loaded:', this.filterOptions); + } + + resolve(); + } else { + setTimeout(checkForSearch, 100); + } + }; + checkForSearch(); + }); + } + + createFilterInterface() { + // Get the search controls container + const searchControlsContainer = this.searchInput.parentNode; + + // Add unified styling to the container + searchControlsContainer.className = 'search-controls-container mb-4'; + + // Create filter section + const filterSection = document.createElement('div'); + filterSection.className = 'search-filters'; + filterSection.innerHTML = this.renderFilterInterface(); + + // Wrap the search input in a styled container + const searchInputWrapper = document.createElement('div'); + searchInputWrapper.className = 'search-input-wrapper'; + searchInputWrapper.innerHTML = ` + + `; + this.searchInput.parentNode.insertBefore(searchInputWrapper, this.searchInput); + searchInputWrapper.appendChild(this.searchInput); + + // Insert filters before the search input wrapper within the same container + searchControlsContainer.insertBefore(filterSection, searchInputWrapper); + + // Add search input wrapper class for consistent styling + this.searchInput.className = 'search-input-field'; + this.searchInput.placeholder = 'Search documentation...'; + + // Bind filter events + this.bindFilterEvents(); + } + + renderFilterInterface() { + // Use topics (new) or categories (legacy) with null safety + const topics = this.filterOptions.topics || this.filterOptions.categories || []; + const topicOptions = topics.map(topic => + `` + ).join(''); + + const tags = this.filterOptions.tags || []; + const tagOptions = tags.map(tag => + `` + ).join(''); + + const types = this.filterOptions.documentTypes || []; + const typeOptions = types.map(type => + `` + ).join(''); + + // Use audience (new) or personas (legacy) with null safety + const audience = this.filterOptions.audience || this.filterOptions.personas || []; + const audienceOptions = audience.map(aud => + `` + ).join(''); + + const difficulties = this.filterOptions.difficulties || []; + const difficultyOptions = difficulties.map(difficulty => + `` + ).join(''); + + // Dynamic facets - render additional filter dropdowns for each facet + const facetFilters = this.renderDynamicFacetFilters(); + + // Count active filters + const activeCount = this.getActiveFilterCount(); + + return ` +
+
+ + Filters + ${activeCount > 0 ? `${activeCount}` : ''} +
+ +
+
+
+ +
+ + +
+
+ +
+ +
+ + +
+
+ +
+ +
+ + +
+
+ + ${facetFilters} +
+ `; + } + + getActiveFilterCount() { + let count = 0; + if (this.currentFilters.topic || this.currentFilters.category) count++; + if (this.currentFilters.tag) count++; + if (this.currentFilters.type) count++; + if (this.currentFilters.facets) { + Object.values(this.currentFilters.facets).forEach(v => { if (v) count++; }); + } + return count; + } + + updateFilterUI() { + const activeCount = this.getActiveFilterCount(); + const countBadge = document.querySelector('.active-filter-count'); + const clearBtn = document.getElementById('clear-filters'); + + if (countBadge) { + if (activeCount > 0) { + countBadge.textContent = activeCount; + countBadge.style.display = 'inline-flex'; + } else { + countBadge.style.display = 'none'; + } + } + + if (clearBtn) { + clearBtn.classList.toggle('hidden', activeCount === 0); + } + + // Update select wrappers with active state + document.querySelectorAll('.filter-select').forEach(select => { + const wrapper = select.closest('.filter-select-wrapper'); + if (wrapper) { + wrapper.classList.toggle('has-value', select.value !== ''); + } + }); + } + + renderDynamicFacetFilters() { + const facets = this.filterOptions.facets || {}; + + return Object.entries(facets).map(([facetKey, facetValues]) => { + if (!Array.isArray(facetValues) || facetValues.length === 0) return ''; + + const options = facetValues.map(value => + `` + ).join(''); + + const icon = this.getFacetIcon(facetKey); + + return ` +
+ +
+ + +
+
+ `; + }).join(''); + } + + getFacetIcon(facetKey) { + const iconMap = { + 'modality': 'fa-solid fa-layer-group', + 'framework': 'fa-solid fa-cube', + 'platform': 'fa-solid fa-desktop', + 'language': 'fa-solid fa-code', + 'version': 'fa-solid fa-code-branch', + 'status': 'fa-solid fa-circle-check' + }; + return iconMap[facetKey.toLowerCase()] || 'fa-solid fa-filter'; + } + + formatFacetName(facetKey) { + return facetKey + .split(/[-_]/) + .map(word => word.charAt(0).toUpperCase() + word.slice(1)) + .join(' '); + } + + formatFacetValue(value) { + if (typeof value !== 'string') return String(value); + return value + .split(/[-_]/) + .map(word => word.charAt(0).toUpperCase() + word.slice(1)) + .join(' '); + } + + formatCategoryName(category) { + return category + .split(/[-_]/) + .map(word => word.charAt(0).toUpperCase() + word.slice(1)) + .join(' '); + } + + formatTypeName(type) { + return type + .split(/[-_]/) + .map(word => word.charAt(0).toUpperCase() + word.slice(1)) + .join(' '); + } + + formatPersonaName(persona) { + // Convert "data-scientist-focused" to "Data Scientist Focused" + return persona + .replace(/-focused$/, '') // Remove "-focused" suffix + .split(/[-_]/) + .map(word => word.charAt(0).toUpperCase() + word.slice(1)) + .join(' '); + } + + formatDifficultyName(difficulty) { + return difficulty.charAt(0).toUpperCase() + difficulty.slice(1); + } + + formatModalityName(modality) { + return modality + .split(/[-_]/) + .map(word => word.charAt(0).toUpperCase() + word.slice(1)) + .join(' '); + } + + bindFilterEvents() { + // Topic filter (new schema, replaces category) + const topicFilter = document.getElementById('topic-filter'); + if (topicFilter) { + topicFilter.addEventListener('change', (e) => { + this.currentFilters.topic = e.target.value; + this.currentFilters.category = e.target.value; // Legacy alias + this.updateFilterUI(); + this.applyFiltersAndSearch(); + }); + } + + // Tag filter + const tagFilter = document.getElementById('tag-filter'); + if (tagFilter) { + tagFilter.addEventListener('change', (e) => { + this.currentFilters.tag = e.target.value; + this.updateFilterUI(); + this.applyFiltersAndSearch(); + }); + } + + // Type filter + const typeFilter = document.getElementById('type-filter'); + if (typeFilter) { + typeFilter.addEventListener('change', (e) => { + this.currentFilters.type = e.target.value; + this.updateFilterUI(); + this.applyFiltersAndSearch(); + }); + } + + // Dynamic facet filters + document.querySelectorAll('.facet-filter').forEach(select => { + select.addEventListener('change', (e) => { + const facetKey = e.target.dataset.facetKey; + if (!this.currentFilters.facets) { + this.currentFilters.facets = {}; + } + this.currentFilters.facets[facetKey] = e.target.value; + // Also set flat key for backwards compatibility + this.currentFilters[facetKey] = e.target.value; + this.updateFilterUI(); + this.applyFiltersAndSearch(); + }); + }); + + // Clear filters + const clearBtn = document.getElementById('clear-filters'); + if (clearBtn) { + clearBtn.addEventListener('click', () => { + this.clearFilters(); + }); + } + } + + clearFilters() { + this.currentFilters = { + topic: '', + category: '', // Legacy alias + tag: '', + type: '', + facets: {} + }; + + // Reset filter selects with null safety + const topicFilter = document.getElementById('topic-filter'); + if (topicFilter) topicFilter.value = ''; + + const tagFilter = document.getElementById('tag-filter'); + if (tagFilter) tagFilter.value = ''; + + const typeFilter = document.getElementById('type-filter'); + if (typeFilter) typeFilter.value = ''; + + // Reset dynamic facet filters + document.querySelectorAll('.facet-filter').forEach(select => { + select.value = ''; + }); + + // Update filter UI state + this.updateFilterUI(); + + // Clear active filter display + this.updateActiveFiltersDisplay(); + + // Re-run search + this.applyFiltersAndSearch(); + } + + handleBadgeClick(filterType, filterValue) { + // Handle dynamic facet filters (facet-modality, facet-framework, etc.) + if (filterType.startsWith('facet-')) { + const facetKey = filterType.replace('facet-', ''); + if (!this.currentFilters.facets) { + this.currentFilters.facets = {}; + } + this.currentFilters.facets[facetKey] = filterValue; + this.currentFilters[facetKey] = filterValue; // Flat alias + + // Update dropdown if it exists + const dropdown = document.getElementById(`facet-${facetKey}-filter`); + if (dropdown) { + dropdown.value = filterValue; + } + } else { + // Standard filter + this.currentFilters[filterType] = filterValue; + + // Handle legacy aliases and update corresponding dropdowns + if (filterType === 'topic') { + this.currentFilters.category = filterValue; + const topicDropdown = document.getElementById('topic-filter'); + if (topicDropdown) topicDropdown.value = filterValue; + } else if (filterType === 'audience') { + this.currentFilters.persona = filterValue; + const audienceDropdown = document.getElementById('audience-filter'); + if (audienceDropdown) audienceDropdown.value = filterValue; + } else if (filterType === 'difficulty') { + const difficultyDropdown = document.getElementById('difficulty-filter'); + if (difficultyDropdown) difficultyDropdown.value = filterValue; + } else if (filterType === 'tag') { + const tagDropdown = document.getElementById('tag-filter'); + if (tagDropdown) tagDropdown.value = filterValue; + } else if (filterType === 'type') { + const typeDropdown = document.getElementById('type-filter'); + if (typeDropdown) typeDropdown.value = filterValue; + } else { + // Fallback: try to update dropdown by filter type + const dropdown = document.getElementById(`${filterType}-filter`); + if (dropdown) { + dropdown.value = filterValue; + } + } + } + + // Update active filters display + this.updateActiveFiltersDisplay(); + + // Re-run search + this.applyFiltersAndSearch(); + } + + updateActiveFiltersDisplay() { + // Remove existing active filters display + const existingDisplay = document.querySelector('.active-filters-display'); + if (existingDisplay) { + existingDisplay.remove(); + } + + // Check for active dynamic facet filters (not in standard dropdowns) + const activeMetadataFilters = []; + + // Dynamic facet filters + if (this.currentFilters.facets) { + Object.entries(this.currentFilters.facets).forEach(([facetKey, facetValue]) => { + if (facetValue) { + activeMetadataFilters.push(`🏷️ ${this.formatFacetName(facetKey)}: ${this.formatFacetValue(facetValue)}`); + } + }); + } + + if (activeMetadataFilters.length > 0) { + const filtersContainer = document.querySelector('.search-filters'); + if (filtersContainer) { + const activeFiltersHtml = ` +
+ Active filters: + ${activeMetadataFilters.map(filter => `${filter}`).join(' ')} + +
+ `; + filtersContainer.insertAdjacentHTML('afterend', activeFiltersHtml); + } + } + } + + clearMetadataFilters() { + this.currentFilters.facets = {}; + + // Reset dynamic facet filters in UI + document.querySelectorAll('.facet-filter').forEach(select => { + select.value = ''; + }); + + this.updateActiveFiltersDisplay(); + this.applyFiltersAndSearch(); + } + + applyFiltersAndSearch() { + if (this.currentQuery) { + this.handleSearch(this.currentQuery); + } + } + + setupEventListeners() { + // Search input + this.searchInput.addEventListener('input', this.debounce((e) => { + this.handleSearch(e.target.value); + }, 300)); + + this.searchInput.addEventListener('keydown', (e) => { + if (e.key === 'Enter') { + e.preventDefault(); + this.handleSearch(e.target.value); + } + }); + + // Badge click handlers (using event delegation) + this.resultsContainer.addEventListener('click', (e) => { + if (e.target.classList.contains('clickable-badge')) { + const filterType = e.target.dataset.filterType; + const filterValue = e.target.dataset.filterValue; + this.handleBadgeClick(filterType, filterValue); + } + }); + + // Make instance available globally for button callbacks + window.searchPageManager = this; + + // Initialize keyboard navigation state + this.focusedIndex = -1; + + // Focus input on page load + this.searchInput.focus(); + } + + /** + * Setup keyboard navigation for search results + */ + setupKeyboardNavigation() { + // Reset focused index when results change + this.focusedIndex = -1; + + // Use a single event listener on the document (avoiding duplicates) + if (!this.keyboardNavigationInitialized) { + this.keyboardNavigationInitialized = true; + + document.addEventListener('keydown', (e) => { + const results = this.resultsContainer.querySelectorAll('.search-result'); + if (results.length === 0) return; + + // Only handle when search area is focused + if (!this.isSearchFocused()) return; + + if (e.key === 'ArrowDown') { + e.preventDefault(); + this.focusedIndex = Math.min(this.focusedIndex + 1, results.length - 1); + this.focusResult(results, this.focusedIndex); + } else if (e.key === 'ArrowUp') { + e.preventDefault(); + this.focusedIndex = Math.max(this.focusedIndex - 1, 0); + this.focusResult(results, this.focusedIndex); + } else if (e.key === 'Enter' && this.focusedIndex >= 0) { + e.preventDefault(); + const link = results[this.focusedIndex].querySelector('a'); + if (link) link.click(); + } else if (e.key === 'Escape') { + this.focusedIndex = -1; + this.clearFocus(); + this.searchInput.focus(); + } + }); + } + } + + /** + * Check if search area is focused + */ + isSearchFocused() { + const active = document.activeElement; + return active === this.searchInput || + this.resultsContainer.contains(active) || + this.resultsContainer.querySelector('.focused'); + } + + /** + * Focus a specific result by index + */ + focusResult(results, index) { + this.clearFocus(); + const element = results[index]; + if (element) { + element.classList.add('focused'); + element.setAttribute('aria-selected', 'true'); + element.scrollIntoView({ behavior: 'smooth', block: 'nearest' }); + } + } + + /** + * Clear focus from all results + */ + clearFocus() { + this.resultsContainer.querySelectorAll('.search-result.focused') + .forEach(el => { + el.classList.remove('focused'); + el.setAttribute('aria-selected', 'false'); + }); + } + + handleUrlSearch() { + const urlParams = new URLSearchParams(window.location.search); + const query = urlParams.get('q'); + if (query) { + this.searchInput.value = query; + this.handleSearch(query); + } + } + + handleSearch(query) { + this.currentQuery = query.trim(); + + if (!this.currentQuery) { + this.showEmptyState(); + return; + } + + if (this.currentQuery.length < 2) { + this.showMinLengthMessage(); + return; + } + + // Perform search with filters + const results = this.searchEngine.search(this.currentQuery, this.currentFilters); + this.allResults = results; + this.displayResults(results); + + // Update URL without reload + const newUrl = new URL(window.location); + newUrl.searchParams.set('q', this.currentQuery); + window.history.replaceState(null, '', newUrl); + } + + displayResults(results) { + if (results.length === 0) { + this.showNoResults(); + return; + } + + const resultsHtml = results.map((result, index) => this.renderResult(result, index)).join(''); + const resultBreakdown = this.getResultBreakdown(results); + + this.resultsContainer.innerHTML = ` + +
+

Search Results

+

+ Found ${results.length} result${results.length !== 1 ? 's' : ''} for "${this.escapeHtml(this.currentQuery)}" + ${this.getActiveFiltersText()} + ${resultBreakdown ? `${resultBreakdown}` : ''} +

+
+
+ ${resultsHtml} +
+ `; + + // Setup keyboard navigation + this.setupKeyboardNavigation(); + + // Emit event for AI assistant integration + this.emitSearchAIRequest(this.currentQuery, results); + } + + /** + * Get result type breakdown for display + */ + getResultBreakdown(results) { + const byType = {}; + results.forEach(r => { + const type = r.content_type || 'Other'; + byType[type] = (byType[type] || 0) + 1; + }); + + const breakdown = Object.entries(byType) + .sort((a, b) => b[1] - a[1]) + .map(([type, count]) => `${count} ${this.escapeHtml(this.formatTypeName(type))}`) + .join(' · '); + + return breakdown; + } + + /** + * Render topic badges for a result + */ + renderTopicBadges(result) { + const topics = this.searchEngine.getDocumentTopics + ? this.searchEngine.getDocumentTopics(result) + : []; + + if (!topics || topics.length === 0) return ''; + + const topicBadges = topics.slice(0, 3).map(topic => + ` + 📁 ${this.escapeHtml(topic)} + ` + ).join(''); + + const moreBadge = topics.length > 3 + ? `+${topics.length - 3}` + : ''; + + return `
${topicBadges}${moreBadge}
`; + } + + getActiveFiltersText() { + const activeFilters = []; + + // Topic (new) or category (legacy) + const topicFilter = this.currentFilters.topic || this.currentFilters.category; + if (topicFilter) { + activeFilters.push(`Topic: ${this.formatCategoryName(topicFilter)}`); + } + if (this.currentFilters.tag) { + activeFilters.push(`Tag: ${this.currentFilters.tag}`); + } + if (this.currentFilters.type) { + activeFilters.push(`Type: ${this.formatTypeName(this.currentFilters.type)}`); + } + + // Dynamic facets + if (this.currentFilters.facets) { + Object.entries(this.currentFilters.facets).forEach(([facetKey, facetValue]) => { + if (facetValue) { + activeFilters.push(`${this.formatFacetName(facetKey)}: ${this.formatFacetValue(facetValue)}`); + } + }); + } + + return activeFilters.length > 0 ? ` (filtered by ${activeFilters.join(', ')})` : ''; + } + + renderResult(result, index) { + const title = this.highlightText(result.title, this.currentQuery); + // Use description (frontmatter) > summary > generated snippet + const snippetSource = result.description || result.summary || this.generateSnippet(result.content, this.currentQuery, 200); + const summary = this.highlightText(snippetSource || '', this.currentQuery); + const breadcrumb = this.getBreadcrumb(result.id); + const sectionInfo = this.getSectionInfo(result.id); + const matchingSections = this.renderMatchingSections(result, this.currentQuery); + const resultTags = this.renderResultTags(result); + const topicBadges = this.renderTopicBadges(result); + const metadataBadges = this.renderMetadataBadges(result); + + // Multiple matches indicator + const multipleMatchesIndicator = result.totalMatches > 1 + ? `+${result.totalMatches - 1} more matches` + : ''; + + return ` +
+
+
+ +
+
+

+ ${title} + ${multipleMatchesIndicator} +

+
+ ${breadcrumb} +
+ ${topicBadges} +
+ ${metadataBadges} +
+ ${resultTags} +
+
+
+

${summary}

+ ${matchingSections} +
+
+ `; + } + + /** + * Generate context-aware snippet around search terms + */ + generateSnippet(content, query, maxLength = 200) { + if (!content) return ''; + + // Find first occurrence of any search term + const terms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2); + const lowerContent = content.toLowerCase(); + + let startIndex = 0; + for (const term of terms) { + const idx = lowerContent.indexOf(term); + if (idx > 0) { + startIndex = Math.max(0, idx - 50); // Start 50 chars before match + break; + } + } + + // Extract snippet around match + let snippet = content.substring(startIndex, startIndex + maxLength); + + // Clean up word boundaries + if (startIndex > 0) { + const firstSpace = snippet.indexOf(' '); + if (firstSpace > 0 && firstSpace < 20) { + snippet = snippet.substring(firstSpace + 1); + } + snippet = '...' + snippet; + } + + if (startIndex + maxLength < content.length) { + const lastSpace = snippet.lastIndexOf(' '); + if (lastSpace > snippet.length - 20) { + snippet = snippet.substring(0, lastSpace); + } + snippet += '...'; + } + + return snippet; + } + + renderResultTags(result) { + const tags = this.searchEngine.getDocumentTags(result); + if (!tags || tags.length === 0) return ''; + + const tagsToShow = tags.slice(0, 6); // Show more tags since they're now on their own line + const tagsHtml = tagsToShow.map(tag => + `${tag}` + ).join(''); + + const moreText = tags.length > 6 ? `+${tags.length - 6} more` : ''; + + return `
${tagsHtml}${moreText}
`; + } + + renderResultCategories(result) { + // Use getDocumentTopics (new) which falls back to getDocumentCategories (legacy) + const topics = this.searchEngine.getDocumentTopics + ? this.searchEngine.getDocumentTopics(result) + : this.searchEngine.getDocumentCategories(result); + if (!topics || topics.length === 0) return ''; + + const topicsHtml = topics.slice(0, 2).map(topic => + `${this.formatCategoryName(topic)}` + ).join(''); + + return `
${topicsHtml}
`; + } + + renderMetadataBadges(result) { + const badges = []; + + // Audience badges (new) or personas (legacy) - render each as separate badge + const audienceField = result.audience || result.personas; + if (audienceField) { + // Parse audience list - handle array, comma-separated string, or space-separated string with known patterns + let audienceList = []; + if (Array.isArray(audienceField)) { + audienceList = audienceField; + } else if (typeof audienceField === 'string') { + // Check for comma separation first + if (audienceField.includes(',')) { + audienceList = audienceField.split(',').map(a => a.trim()).filter(Boolean); + } else { + // Try to match known audience patterns (e.g., "Technical Writer Developer" -> ["Technical Writer", "Developer"]) + const knownAudiences = ['Technical Writer', 'Developer', 'Data Scientist', 'ML Engineer', 'DevOps', 'Administrator', 'Researcher']; + const matches = []; + let remaining = audienceField; + + for (const known of knownAudiences) { + if (remaining.includes(known)) { + matches.push(known); + remaining = remaining.replace(known, '').trim(); + } + } + + audienceList = matches.length > 0 ? matches : [audienceField]; + } + } + + audienceList.forEach(audience => { + const formatted = this.formatPersonaName(audience); + badges.push(``); + }); + } + + // Difficulty badge + if (result.difficulty) { + const difficultyIcon = this.getDifficultyIcon(result.difficulty); + badges.push(``); + } + + // Dynamic facet badges + if (result.facets && typeof result.facets === 'object') { + Object.entries(result.facets).forEach(([facetKey, facetValue]) => { + if (facetValue) { + const values = Array.isArray(facetValue) ? facetValue : [facetValue]; + values.forEach(value => { + badges.push(``); + }); + } + }); + } + + // Legacy flat modality badge (if not in facets) + if (result.modality && (!result.facets || !result.facets.modality)) { + const modalityIcon = this.getModalityIcon(result.modality); + badges.push(``); + } + + return badges.join(''); + } + + getDifficultyIcon(difficulty) { + switch (difficulty.toLowerCase()) { + case 'beginner': return '🔰'; + case 'intermediate': return '📊'; + case 'advanced': return '🚀'; + case 'reference': return '📚'; + default: return '📖'; + } + } + + getModalityIcon(modality) { + switch (modality.toLowerCase()) { + case 'text-only': return '📝'; + case 'image-only': return '🖼️'; + case 'video-only': return '🎥'; + case 'multimodal': return '🔀'; + case 'universal': return '🌐'; + default: return '📄'; + } + } + + renderMatchingSections(result, query) { + if (!result.matchingSections || result.matchingSections.length <= 1) { + return ''; + } + + const sectionsToShow = result.matchingSections.slice(0, 5); + const hasMore = result.matchingSections.length > 5; + + const sectionsHtml = sectionsToShow.map(section => { + const sectionIcon = this.getSectionIcon(section.type, section.level); + const sectionText = this.highlightText(section.text, query); + const anchor = section.anchor ? `#${section.anchor}` : ''; + const sectionUrl = this.getDocumentUrl(result) + anchor; + + return ` + + ${sectionIcon} + ${sectionText} + + + `; + }).join(''); + + const moreIndicator = hasMore ? ` +
+ + +${result.matchingSections.length - 5} more sections +
+ ` : ''; + + return ` +
+
+ + Matching sections: +
+ +
+ `; + } + + getSectionIcon(type, level) { + switch (type) { + case 'title': + return ''; + case 'heading': + if (level <= 2) return ''; + if (level <= 4) return ''; + return ''; + case 'content': + return ''; + default: + return ''; + } + } + + getBreadcrumb(docId) { + const parts = docId.split('/').filter(part => part && part !== 'index'); + return parts.length > 0 ? parts.join(' › ') : 'Home'; + } + + getSectionInfo(docId) { + const path = docId.toLowerCase(); + + if (path.includes('get-started') || path.includes('getting-started')) { + return { + class: 'getting-started', + icon: 'fas fa-rocket', + label: 'Getting Started' + }; + } else if (path.includes('admin')) { + return { + class: 'admin', + icon: 'fas fa-cog', + label: 'Administration' + }; + } else if (path.includes('reference') || path.includes('api')) { + return { + class: 'reference', + icon: 'fas fa-book', + label: 'Reference' + }; + } else if (path.includes('about') || path.includes('concepts')) { + return { + class: 'about', + icon: 'fas fa-info-circle', + label: 'About' + }; + } else if (path.includes('tutorial')) { + return { + class: 'tutorial', + icon: 'fas fa-graduation-cap', + label: 'Tutorial' + }; + } else { + return { + class: 'default', + icon: 'fas fa-file-lines', + label: 'Documentation' + }; + } + } + + getDocumentUrl(result) { + if (result.url) { + return result.url; + } + return `${result.id.replace(/^\/+/, '')}.html`; + } + + highlightText(text, query) { + if (!query) return this.escapeHtml(text); + + const terms = query.toLowerCase().split(/\s+/).filter(term => term.length > 1); + let highlightedText = this.escapeHtml(text); + + terms.forEach(term => { + const regex = new RegExp(`(${this.escapeRegex(term)})`, 'gi'); + highlightedText = highlightedText.replace(regex, '$1'); + }); + + return highlightedText; + } + + showEmptyState() { + this.resultsContainer.innerHTML = ` +
+ +

Search Documentation

+

Start typing to search across all documentation pages...

+
+ + + Search Tips: Use specific terms for better results • Use filters to narrow down results • Search includes titles, content, and headings + +
+
+ `; + } + + showMinLengthMessage() { + this.resultsContainer.innerHTML = ` +
+ +

Keep typing...

+

Enter at least 2 characters to search

+
+ `; + } + + showNoResults() { + const filtersActive = this.currentFilters.topic || this.currentFilters.category || + this.currentFilters.tag || this.currentFilters.type || + (this.currentFilters.facets && Object.keys(this.currentFilters.facets).some(k => this.currentFilters.facets[k])); + const suggestionText = filtersActive + ? 'Try clearing some filters or using different keywords' + : 'Try different keywords or check your spelling'; + + this.resultsContainer.innerHTML = ` +
+ +

No results found

+

No results found for "${this.escapeHtml(this.currentQuery)}"${this.getActiveFiltersText()}

+
+ + ${suggestionText} + +
+ ${filtersActive ? ` +
+ +
+ ` : ''} +
+ `; + } + + // Utility methods + debounce(func, wait) { + let timeout; + return function executedFunction(...args) { + const later = () => { + clearTimeout(timeout); + func(...args); + }; + clearTimeout(timeout); + timeout = setTimeout(later, wait); + }; + } + + escapeHtml(unsafe) { + return unsafe + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); + } + + escapeRegex(string) { + return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + } + + emitSearchAIRequest(query, results) { + // Emit event for AI assistant integration (search page) + const aiRequestEvent = new CustomEvent('search-ai-request', { + detail: { + query: query, + results: results, + count: results.length, + container: 'ai-assistant-container' + } + }); + document.dispatchEvent(aiRequestEvent); + + console.log(`🤖 Emitted search-ai-request event for query: "${query}" with ${results.length} results`); + } +} \ No newline at end of file diff --git a/docs/_extensions/search_assets/modules/Utils.js b/docs/_extensions/search_assets/modules/Utils.js new file mode 100644 index 0000000000..4651479fec --- /dev/null +++ b/docs/_extensions/search_assets/modules/Utils.js @@ -0,0 +1,148 @@ +/** + * Utils Module + * Contains utility functions used across the enhanced search system + */ + +class Utils { + constructor() { + // Utility class - no initialization needed + } + + /** + * Debounce function to limit rapid function calls + */ + debounce(func, wait) { + let timeout; + return function executedFunction(...args) { + const later = () => { + clearTimeout(timeout); + func(...args); + }; + clearTimeout(timeout); + timeout = setTimeout(later, wait); + }; + } + + /** + * Escape special regex characters + */ + escapeRegex(string) { + return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + } + + /** + * Escape HTML to prevent XSS attacks + */ + escapeHtml(unsafe) { + return unsafe + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); + } + + /** + * Highlight search terms in text + */ + highlightText(text, query, highlightClass = 'search-highlight') { + if (!query || !text) return text; + + const terms = query.toLowerCase().split(/\s+/); + let highlighted = text; + + terms.forEach(term => { + if (term.length > 1) { + const regex = new RegExp(`(${this.escapeRegex(term)})`, 'gi'); + highlighted = highlighted.replace(regex, `$1`); + } + }); + + return highlighted; + } + + /** + * Generate breadcrumb from document ID + */ + generateBreadcrumb(docId) { + const parts = docId.split('/').filter(part => part && part !== 'index'); + return parts.length > 0 ? parts.join(' › ') : 'Home'; + } + + /** + * Generate anchor link from heading text (Sphinx-style) + */ + generateAnchor(headingText) { + return headingText + .toLowerCase() + .replace(/[^\w\s-]/g, '') // Remove special chars + .replace(/\s+/g, '-') // Replace spaces with hyphens + .trim(); + } + + /** + * Get document URL from result object + */ + getDocumentUrl(result) { + if (result.url) { + return result.url; + } + return `${result.id.replace(/^\/+/, '')}.html`; + } + + /** + * Get appropriate icon for section type + */ + getSectionIcon(type, level) { + switch (type) { + case 'title': + return ''; + case 'heading': + if (level <= 2) return ''; + if (level <= 4) return ''; + return ''; + case 'content': + return ''; + default: + return ''; + } + } + + /** + * Load external script (like Lunr.js) + */ + async loadScript(src) { + return new Promise((resolve, reject) => { + const script = document.createElement('script'); + script.src = src; + script.onload = resolve; + script.onerror = reject; + document.head.appendChild(script); + }); + } + + /** + * Safe substring with fallback + */ + safeSubstring(str, maxLength = 200, fallback = '') { + if (!str) return fallback; + return str.length > maxLength ? str.substring(0, maxLength) : str; + } + + /** + * Check if string is valid and not empty + */ + isValidString(str) { + return typeof str === 'string' && str.trim().length > 0; + } + + /** + * Safe array access with fallback + */ + safeArray(arr, fallback = []) { + return Array.isArray(arr) ? arr : fallback; + } +} + +// Make Utils available globally +window.Utils = Utils; \ No newline at end of file diff --git a/docs/_extensions/search_assets/templates/search.html b/docs/_extensions/search_assets/templates/search.html new file mode 100644 index 0000000000..ed4a5b4e4d --- /dev/null +++ b/docs/_extensions/search_assets/templates/search.html @@ -0,0 +1,49 @@ +{%- extends "page.html" %} +{# Enhanced Search Page - Clean template without embedded CSS/JS #} + +{% block docs_body %} +
+

{{ _("Search") }}

+ + + + {# Search and filter controls container - will be enhanced by JavaScript #} +
+ +
+ + {# Search results container #} +
+
+ +

Search Documentation

+

Start typing to search across all documentation pages...

+
+ + + Search Tips: Use specific terms for better results • Search includes titles, content, and + headings + +
+
+
+
+{% endblock docs_body %} + +{# Page metadata #} +{%- block htmltitle -%} +{{ _("Search") }} - {{ title or docstitle }} +{%- endblock htmltitle -%} + +{# Load our enhanced search scripts #} +{% block scripts -%} +{{ super() }} +{# Search page script is loaded via html_js_files in conf.py #} +{%- endblock scripts %} \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 9efa8a014a..d2f55632b2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -41,6 +41,7 @@ "sphinx_design", "sphinxcontrib.mermaid", "json_output", + "search_assets", # Enhanced search assets extension ] redirects = { From 118fe5d6e63fef0d3dd02673e668cc31516b00ad Mon Sep 17 00:00:00 2001 From: Miyoung Choi Date: Fri, 16 Jan 2026 12:15:13 -0800 Subject: [PATCH 5/6] pre-commit fixes --- docs/_extensions/json_output/__init__.py | 15 ++++++++++++ docs/_extensions/json_output/config.py | 15 ++++++++++++ .../json_output/content/__init__.py | 15 ++++++++++++ .../json_output/content/extractor.py | 15 ++++++++++++ .../json_output/content/metadata.py | 15 ++++++++++++ .../json_output/content/structured.py | 15 ++++++++++++ docs/_extensions/json_output/content/text.py | 15 ++++++++++++ docs/_extensions/json_output/core/__init__.py | 15 ++++++++++++ docs/_extensions/json_output/core/builder.py | 16 ++++++++++++- .../json_output/core/document_discovery.py | 15 ++++++++++++ .../json_output/core/global_metadata.py | 22 +++++++++++------ .../json_output/core/hierarchy_builder.py | 15 ++++++++++++ .../json_output/core/json_formatter.py | 24 +++++++++++++++---- .../json_output/core/json_writer.py | 15 ++++++++++++ .../json_output/processing/__init__.py | 15 ++++++++++++ .../json_output/processing/cache.py | 15 ++++++++++++ .../json_output/processing/processor.py | 15 ++++++++++++ docs/_extensions/json_output/utils.py | 15 ++++++++++++ docs/_extensions/search_assets/__init__.py | 15 ++++++++++++ 19 files changed, 289 insertions(+), 13 deletions(-) diff --git a/docs/_extensions/json_output/__init__.py b/docs/_extensions/json_output/__init__.py index 1a821cbdc2..447af75b13 100644 --- a/docs/_extensions/json_output/__init__.py +++ b/docs/_extensions/json_output/__init__.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Sphinx extension to generate JSON output for every page alongside HTML output. diff --git a/docs/_extensions/json_output/config.py b/docs/_extensions/json_output/config.py index 4f9a102e6f..de9e3315a4 100644 --- a/docs/_extensions/json_output/config.py +++ b/docs/_extensions/json_output/config.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Configuration management for JSON output extension.""" from typing import Any diff --git a/docs/_extensions/json_output/content/__init__.py b/docs/_extensions/json_output/content/__init__.py index 0eee0b4920..abc7b45ee7 100644 --- a/docs/_extensions/json_output/content/__init__.py +++ b/docs/_extensions/json_output/content/__init__.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Content extraction functions for JSON output.""" from .extractor import extract_document_content diff --git a/docs/_extensions/json_output/content/extractor.py b/docs/_extensions/json_output/content/extractor.py index d8e88f03e3..9cf975650a 100644 --- a/docs/_extensions/json_output/content/extractor.py +++ b/docs/_extensions/json_output/content/extractor.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Main content extraction orchestration.""" from typing import Any diff --git a/docs/_extensions/json_output/content/metadata.py b/docs/_extensions/json_output/content/metadata.py index cb21a6052f..03c543d961 100644 --- a/docs/_extensions/json_output/content/metadata.py +++ b/docs/_extensions/json_output/content/metadata.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Metadata and frontmatter extraction functions.""" from typing import Any diff --git a/docs/_extensions/json_output/content/structured.py b/docs/_extensions/json_output/content/structured.py index 0742c324aa..413810fc57 100644 --- a/docs/_extensions/json_output/content/structured.py +++ b/docs/_extensions/json_output/content/structured.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Structured content extraction functions for headings, code blocks, links, and images.""" import re diff --git a/docs/_extensions/json_output/content/text.py b/docs/_extensions/json_output/content/text.py index 5f810915a3..6e91afe71b 100644 --- a/docs/_extensions/json_output/content/text.py +++ b/docs/_extensions/json_output/content/text.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Text content extraction functions.""" import re diff --git a/docs/_extensions/json_output/core/__init__.py b/docs/_extensions/json_output/core/__init__.py index cc699ee8cc..b1512c1130 100644 --- a/docs/_extensions/json_output/core/__init__.py +++ b/docs/_extensions/json_output/core/__init__.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Core JSON output generation components.""" from .builder import JSONOutputBuilder diff --git a/docs/_extensions/json_output/core/builder.py b/docs/_extensions/json_output/core/builder.py index 3f13b2dda6..2652b9493c 100644 --- a/docs/_extensions/json_output/core/builder.py +++ b/docs/_extensions/json_output/core/builder.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """JSONOutputBuilder class for handling JSON output generation.""" from typing import Any @@ -9,7 +24,6 @@ from ..content import extract_document_metadata as _extract_document_metadata from ..processing.cache import JSONOutputCache from ..utils import get_setting, should_generate_json - from .document_discovery import DocumentDiscovery from .hierarchy_builder import HierarchyBuilder from .json_formatter import JSONFormatter diff --git a/docs/_extensions/json_output/core/document_discovery.py b/docs/_extensions/json_output/core/document_discovery.py index 02f1fbd055..3dc255ba8d 100644 --- a/docs/_extensions/json_output/core/document_discovery.py +++ b/docs/_extensions/json_output/core/document_discovery.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Document discovery and filtering functionality.""" from typing import TYPE_CHECKING diff --git a/docs/_extensions/json_output/core/global_metadata.py b/docs/_extensions/json_output/core/global_metadata.py index 9644ea3dd3..83430afd2e 100644 --- a/docs/_extensions/json_output/core/global_metadata.py +++ b/docs/_extensions/json_output/core/global_metadata.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Global metadata extraction from Sphinx configuration. This module provides functions to extract and build global metadata @@ -138,10 +153,3 @@ def _extract_product_name(project: str) -> str: name = re.sub(suffix, "", name, flags=re.IGNORECASE) return name.strip() - - - - - - - diff --git a/docs/_extensions/json_output/core/hierarchy_builder.py b/docs/_extensions/json_output/core/hierarchy_builder.py index d3604b99a9..8bd53c1358 100644 --- a/docs/_extensions/json_output/core/hierarchy_builder.py +++ b/docs/_extensions/json_output/core/hierarchy_builder.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Hierarchy building for complex document structures like main index.""" from typing import TYPE_CHECKING, Any diff --git a/docs/_extensions/json_output/core/json_formatter.py b/docs/_extensions/json_output/core/json_formatter.py index d312dbfce8..250451a31c 100644 --- a/docs/_extensions/json_output/core/json_formatter.py +++ b/docs/_extensions/json_output/core/json_formatter.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """JSON data formatting and structure building.""" from datetime import datetime, timezone @@ -8,7 +23,6 @@ from sphinx.util import logging from ..utils import get_document_url, get_setting - from .document_discovery import DocumentDiscovery from .global_metadata import get_global_metadata @@ -180,9 +194,7 @@ def _add_global_metadata(self, data: dict[str, Any]) -> None: if value: # Only add non-empty values data[key] = value - def _add_content_fields( - self, data: dict[str, Any], content_data: dict[str, Any], docname: str, title: str - ) -> None: + def _add_content_fields(self, data: dict[str, Any], content_data: dict[str, Any], docname: str, title: str) -> None: """Add content-related fields to JSON data.""" self._add_primary_content(data, content_data) self._add_summary_content(data, content_data) @@ -238,7 +250,9 @@ def _add_optional_features(self, data: dict[str, Any], content_data: dict[str, A if get_setting(self.config, "extract_keywords", True) and "keywords" in content_data: keywords_max_count = get_setting(self.config, "keywords_max_count", 50) keywords = ( - content_data["keywords"][:keywords_max_count] if keywords_max_count > 0 else content_data["keywords"] + content_data["keywords"][:keywords_max_count] + if keywords_max_count > 0 + else content_data["keywords"] ) data["keywords"] = keywords diff --git a/docs/_extensions/json_output/core/json_writer.py b/docs/_extensions/json_output/core/json_writer.py index 1bfdbae3b5..14eea68d1d 100644 --- a/docs/_extensions/json_output/core/json_writer.py +++ b/docs/_extensions/json_output/core/json_writer.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """JSON file writing and output operations.""" import json diff --git a/docs/_extensions/json_output/processing/__init__.py b/docs/_extensions/json_output/processing/__init__.py index 7e0a1fdd6b..f003040578 100644 --- a/docs/_extensions/json_output/processing/__init__.py +++ b/docs/_extensions/json_output/processing/__init__.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Processing pipeline and orchestration components.""" from .cache import JSONOutputCache diff --git a/docs/_extensions/json_output/processing/cache.py b/docs/_extensions/json_output/processing/cache.py index ce79d8c70a..bc397dcf1b 100644 --- a/docs/_extensions/json_output/processing/cache.py +++ b/docs/_extensions/json_output/processing/cache.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Caching and incremental build support for JSON output extension.""" from collections.abc import Callable diff --git a/docs/_extensions/json_output/processing/processor.py b/docs/_extensions/json_output/processing/processor.py index cb3ec29fc3..357fe83ff9 100644 --- a/docs/_extensions/json_output/processing/processor.py +++ b/docs/_extensions/json_output/processing/processor.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Document processing and build orchestration for JSON output extension.""" import multiprocessing diff --git a/docs/_extensions/json_output/utils.py b/docs/_extensions/json_output/utils.py index 17ee6ee5ae..43fbc044b1 100644 --- a/docs/_extensions/json_output/utils.py +++ b/docs/_extensions/json_output/utils.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Utility functions for JSON output.""" import fnmatch diff --git a/docs/_extensions/search_assets/__init__.py b/docs/_extensions/search_assets/__init__.py index 08cac4e9da..032e149a71 100644 --- a/docs/_extensions/search_assets/__init__.py +++ b/docs/_extensions/search_assets/__init__.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Enhanced Search Extension for Sphinx Provides enhanced search page functionality without interfering with default search From 95ebc4b8c70bd209dad0aa9c8e391206e290bbb0 Mon Sep 17 00:00:00 2001 From: Miyoung Choi Date: Fri, 16 Jan 2026 12:22:03 -0800 Subject: [PATCH 6/6] precommit --- .../search_assets/enhanced-search.css | 2 +- docs/_extensions/search_assets/main.js | 78 +++--- .../search_assets/modules/DocumentLoader.js | 58 ++--- .../search_assets/modules/EventHandler.js | 88 +++---- .../search_assets/modules/ResultRenderer.js | 78 +++--- .../search_assets/modules/SearchEngine.js | 222 +++++++++--------- .../search_assets/modules/SearchInterface.js | 150 ++++++------ .../modules/SearchPageManager.js | 16 +- .../search_assets/modules/Utils.js | 32 +-- .../search_assets/templates/search.html | 2 +- 10 files changed, 363 insertions(+), 363 deletions(-) diff --git a/docs/_extensions/search_assets/enhanced-search.css b/docs/_extensions/search_assets/enhanced-search.css index ec61338c25..6ae98c88cb 100644 --- a/docs/_extensions/search_assets/enhanced-search.css +++ b/docs/_extensions/search_assets/enhanced-search.css @@ -1367,4 +1367,4 @@ html[data-theme="dark"] .filter-select-wrapper.has-value { .search-result.focused { transition: none; } -} \ No newline at end of file +} diff --git a/docs/_extensions/search_assets/main.js b/docs/_extensions/search_assets/main.js index 2fc23965eb..31140ef1f5 100644 --- a/docs/_extensions/search_assets/main.js +++ b/docs/_extensions/search_assets/main.js @@ -18,72 +18,72 @@ class EnhancedSearch { highlightClass: 'search-highlight', ...options }; - + this.isLoaded = false; - + // Module instances this.documentLoader = null; this.searchEngine = null; this.searchPageManager = null; this.utils = null; - + this.init(); } - + async init() { try { // Load required modules await this.loadModules(); - + // Initialize core modules this.utils = new Utils(); this.documentLoader = new DocumentLoader(); this.searchEngine = new SearchEngine(this.utils); - + // Load documents and initialize search engine (always needed) await this.documentLoader.loadDocuments(); await this.searchEngine.initialize(this.documentLoader.getDocuments()); - + // Check if we're on the search page const isSearchPage = this.isSearchPage(); - + if (isSearchPage) { this.searchPageManager = new SearchPageManager(); } - + this.isLoaded = true; } catch (error) { this.fallbackToDefaultSearch(); } } - + isSearchPage() { - return window.location.pathname.includes('/search') || + return window.location.pathname.includes('/search') || window.location.pathname.includes('/search.html') || window.location.pathname.endsWith('search/') || document.querySelector('#enhanced-search-page-input') !== null || document.querySelector('#enhanced-search-page-results') !== null; } - + async loadModules() { const moduleNames = [ 'Utils', - 'DocumentLoader', + 'DocumentLoader', 'SearchEngine', 'SearchPageManager' ]; - + // Load modules with smart path resolution - const modulePromises = moduleNames.map(name => + const modulePromises = moduleNames.map(name => this.loadModuleWithFallback(name) ); - + await Promise.all(modulePromises); } - + async loadModuleWithFallback(moduleName) { const possiblePaths = this.getModulePaths(moduleName); - + for (const path of possiblePaths) { try { await this.loadModule(path); @@ -92,51 +92,51 @@ class EnhancedSearch { // Continue to next path } } - + throw new Error(`Failed to load module ${moduleName} from any path`); } - + getModulePaths(moduleName) { const fileName = `${moduleName}.js`; - + // Calculate nesting level to determine correct _static path const pathParts = window.location.pathname.split('/').filter(part => part.length > 0); const htmlFile = pathParts[pathParts.length - 1]; - + // Remove the HTML file from the count if it exists let nestingLevel = pathParts.length; if (htmlFile && htmlFile.endsWith('.html')) { nestingLevel--; } - + // Build the correct _static path based on nesting level const staticPrefix = nestingLevel > 0 ? '../'.repeat(nestingLevel) : './'; const staticPath = `${staticPrefix}_static`; - + // Search assets only has modules directory const moduleDir = 'modules'; - + // Generate paths in order of likelihood const paths = []; - + // 1. Most likely path based on calculated nesting paths.push(`${staticPath}/${moduleDir}/${fileName}`); - + // 2. Fallback static paths (for different nesting scenarios) paths.push(`_static/${moduleDir}/${fileName}`); paths.push(`./_static/${moduleDir}/${fileName}`); if (nestingLevel > 1) { paths.push(`../_static/${moduleDir}/${fileName}`); } - + // 3. Legacy fallback paths paths.push(`./modules/${fileName}`); paths.push(`../modules/${fileName}`); paths.push(`modules/${fileName}`); - + return paths; } - + async loadModule(src) { return new Promise((resolve, reject) => { const script = document.createElement('script'); @@ -146,37 +146,37 @@ class EnhancedSearch { document.head.appendChild(script); }); } - + // Public API methods search(query) { if (!this.searchEngine) { return []; } - + return this.searchEngine.search(query); } - + renderResults(results, query) { // Use SearchPageManager for search page rendering return ''; } - + fallbackToDefaultSearch() { // Don't interfere with default search - just fallback } - + getDocuments() { return this.documentLoader ? this.documentLoader.getDocuments() : []; } - + get documents() { return this.getDocuments(); } - + getSearchEngine() { return this.searchEngine; } - + getOptions() { return this.options; } @@ -194,4 +194,4 @@ document.addEventListener('DOMContentLoaded', function() { }); }); -} // End of duplicate prevention check \ No newline at end of file +} // End of duplicate prevention check diff --git a/docs/_extensions/search_assets/modules/DocumentLoader.js b/docs/_extensions/search_assets/modules/DocumentLoader.js index 68f31a27b9..a15e55c1f0 100644 --- a/docs/_extensions/search_assets/modules/DocumentLoader.js +++ b/docs/_extensions/search_assets/modules/DocumentLoader.js @@ -8,7 +8,7 @@ class DocumentLoader { this.documents = {}; this.isLoaded = false; } - + /** * Load documents from JSON index files */ @@ -23,7 +23,7 @@ class DocumentLoader { throw error; } } - + /** * Fetch document data from various possible paths */ @@ -31,11 +31,11 @@ class DocumentLoader { // Try different paths to account for different page depths const possiblePaths = [ './index.json', - '../index.json', + '../index.json', '../../index.json', '../../../index.json' ]; - + for (const path of possiblePaths) { try { const response = await fetch(path); @@ -48,10 +48,10 @@ class DocumentLoader { console.log(`❌ Failed to load from ${path}: ${error.message}`); } } - + throw new Error('Failed to load search data from any path'); } - + /** * Process and filter documents from raw data * Supports three formats: @@ -71,29 +71,29 @@ class DocumentLoader { // Fallback: single document allDocs = [data]; } - + // Filter out problematic documents const filteredDocs = allDocs.filter(doc => this.isValidDocument(doc)); - + // Store documents by ID filteredDocs.forEach(doc => { this.documents[doc.id] = this.sanitizeDocument(doc); }); - + console.log(`Processed ${filteredDocs.length} documents (filtered from ${allDocs.length} total)`); } - + /** * Check if a document is valid for indexing */ isValidDocument(doc) { const docId = doc.id || ''; - return !docId.toLowerCase().includes('readme') && - !docId.startsWith('_') && - doc.title && + return !docId.toLowerCase().includes('readme') && + !docId.startsWith('_') && + doc.title && doc.content; } - + /** * Sanitize document content for safe indexing * Supports both new schema fields and legacy fields @@ -122,20 +122,20 @@ class DocumentLoader { section_path: this.sanitizeArray(doc.section_path, 200), author: this.sanitizeText(doc.author, 100) }; - + // Preserve facets object (dynamic, user-defined keys) if (doc.facets && typeof doc.facets === 'object') { sanitized.facets = this.sanitizeFacets(doc.facets); } - + // Preserve legacy flat modality if present and no facets.modality if (doc.modality && (!doc.facets || !doc.facets.modality)) { sanitized.modality = this.sanitizeText(doc.modality, 50); } - + return sanitized; } - + /** * Sanitize facets object (dynamic keys with string or array values) */ @@ -150,7 +150,7 @@ class DocumentLoader { }); return sanitized; } - + /** * Sanitize text content with length limits */ @@ -158,7 +158,7 @@ class DocumentLoader { if (!text || typeof text !== 'string') return ''; return text.substring(0, maxLength); } - + /** * Sanitize array content */ @@ -166,7 +166,7 @@ class DocumentLoader { if (!Array.isArray(arr)) return []; return arr.map(item => String(item)).join(' ').substring(0, maxLength); } - + /** * Sanitize headings array */ @@ -177,49 +177,49 @@ class DocumentLoader { level: Number(heading.level) || 1 })); } - + /** * Get all loaded documents */ getDocuments() { return this.documents; } - + /** * Get a specific document by ID */ getDocument(id) { return this.documents[id]; } - + /** * Get document count */ getDocumentCount() { return Object.keys(this.documents).length; } - + /** * Check if documents are loaded */ isReady() { return this.isLoaded && Object.keys(this.documents).length > 0; } - + /** * Get documents as array for indexing */ getDocumentsArray() { return Object.values(this.documents); } - + /** * Filter documents by criteria */ filterDocuments(filterFn) { return this.getDocumentsArray().filter(filterFn); } - + /** * Get document statistics */ @@ -236,4 +236,4 @@ class DocumentLoader { } // Make DocumentLoader available globally -window.DocumentLoader = DocumentLoader; \ No newline at end of file +window.DocumentLoader = DocumentLoader; diff --git a/docs/_extensions/search_assets/modules/EventHandler.js b/docs/_extensions/search_assets/modules/EventHandler.js index f1981c4748..31cba430f6 100644 --- a/docs/_extensions/search_assets/modules/EventHandler.js +++ b/docs/_extensions/search_assets/modules/EventHandler.js @@ -10,14 +10,14 @@ class EventHandler { this.resultRenderer = enhancedSearch.resultRenderer; this.searchEngine = enhancedSearch.searchEngine; this.utils = enhancedSearch.utils; - + // Track bound event listeners for cleanup this.boundListeners = new Map(); - + // Debounced search function this.debouncedSearch = this.utils.debounce(this.handleSearch.bind(this), 200); } - + /** * Bind all event listeners */ @@ -27,25 +27,25 @@ class EventHandler { this.bindGlobalEvents(); console.log('✅ Event handlers bound'); } - + /** * Bind input-related events */ bindInputEvents() { const input = this.searchInterface.getInput(); if (!input) return; - + // Search input const inputHandler = (e) => this.debouncedSearch(e); input.addEventListener('input', inputHandler); this.boundListeners.set('input', inputHandler); - + // Keyboard navigation const keydownHandler = (e) => this.handleKeyDown(e); input.addEventListener('keydown', keydownHandler); this.boundListeners.set('keydown', keydownHandler); } - + /** * Bind page-specific events (replaces modal events) */ @@ -54,11 +54,11 @@ class EventHandler { if (!this.searchInterface.isSearchPage()) { return; } - + // Get query parameter if we're on search page const urlParams = new URLSearchParams(window.location.search); const query = urlParams.get('q'); - + if (query) { // Perform search immediately with the query from URL setTimeout(() => { @@ -70,7 +70,7 @@ class EventHandler { }, 100); } } - + /** * Bind global keyboard shortcuts */ @@ -90,77 +90,77 @@ class EventHandler { return; } }; - + document.addEventListener('keydown', globalKeyHandler); this.boundListeners.set('global', globalKeyHandler); } - + /** * Handle search input */ async handleSearch(event) { const query = event.target.value.trim(); const resultsContainer = this.searchInterface.getResultsContainer(); - + if (query.length < this.enhancedSearch.options.minQueryLength) { this.searchInterface.showEmptyState(); this.searchInterface.clearStats(); return; } - + try { // Show loading state this.resultRenderer.renderLoading(resultsContainer); - + // Perform search const results = this.searchEngine.search(query, this.enhancedSearch.options.maxResults); const count = results.length; - + // Render results this.resultRenderer.render(results, query, resultsContainer); - + // Update stats this.searchInterface.updateStats(query, count); - + // Emit search event for AI Assistant extension if available this.emitSearchEvent(query, results, count); - + } catch (error) { console.error('Search error:', error); this.resultRenderer.renderError(resultsContainer, 'Search temporarily unavailable'); this.searchInterface.clearStats(); } } - + /** * Handle keyboard navigation */ handleKeyDown(event) { const resultsContainer = this.searchInterface.getResultsContainer(); - + switch (event.key) { case 'ArrowDown': event.preventDefault(); this.resultRenderer.selectNext(resultsContainer); break; - + case 'ArrowUp': event.preventDefault(); this.resultRenderer.selectPrevious(resultsContainer); break; - + case 'Enter': event.preventDefault(); this.resultRenderer.activateSelected(resultsContainer); break; - + case 'Escape': event.preventDefault(); this.enhancedSearch.hide(); break; } } - + /** * Emit search event for other extensions */ @@ -172,7 +172,7 @@ class EventHandler { document.dispatchEvent(searchEvent); } } - + /** * Handle window resize */ @@ -183,7 +183,7 @@ class EventHandler { // Could add responsive adjustments here } } - + /** * Handle focus management */ @@ -194,10 +194,10 @@ class EventHandler { const focusableElements = modal.querySelectorAll( 'button, input, select, textarea, [tabindex]:not([tabindex="-1"])' ); - + const firstFocusable = focusableElements[0]; const lastFocusable = focusableElements[focusableElements.length - 1]; - + if (event.key === 'Tab') { if (event.shiftKey) { // Shift + Tab @@ -215,7 +215,7 @@ class EventHandler { } } } - + /** * Bind additional event listeners */ @@ -224,13 +224,13 @@ class EventHandler { const resizeHandler = this.utils.debounce(() => this.handleResize(), 100); window.addEventListener('resize', resizeHandler); this.boundListeners.set('resize', resizeHandler); - + // Focus trap const focusHandler = (e) => this.handleFocus(e); document.addEventListener('keydown', focusHandler); this.boundListeners.set('focus', focusHandler); } - + /** * Unbind all event listeners */ @@ -241,37 +241,37 @@ class EventHandler { input.removeEventListener('input', this.boundListeners.get('input')); input.removeEventListener('keydown', this.boundListeners.get('keydown')); } - + // Remove modal events const closeBtn = this.searchInterface.getCloseButton(); if (closeBtn && this.boundListeners.has('close')) { closeBtn.removeEventListener('click', this.boundListeners.get('close')); } - + const backdrop = this.searchInterface.getBackdrop(); if (backdrop && this.boundListeners.has('backdrop')) { backdrop.removeEventListener('click', this.boundListeners.get('backdrop')); } - + // Remove global events if (this.boundListeners.has('global')) { document.removeEventListener('keydown', this.boundListeners.get('global')); } - + if (this.boundListeners.has('resize')) { window.removeEventListener('resize', this.boundListeners.get('resize')); } - + if (this.boundListeners.has('focus')) { document.removeEventListener('keydown', this.boundListeners.get('focus')); } - + // Clear listeners map this.boundListeners.clear(); - + console.log('✅ Event handlers unbound'); } - + /** * Get event handler statistics */ @@ -283,16 +283,16 @@ class EventHandler { hasModal: !!this.searchInterface.getModal() }; } - + /** * Check if events are properly bound */ isReady() { - return this.boundListeners.size > 0 && - this.searchInterface.getInput() !== null && + return this.boundListeners.size > 0 && + this.searchInterface.getInput() !== null && this.searchInterface.getModal() !== null; } } // Make EventHandler available globally -window.EventHandler = EventHandler; \ No newline at end of file +window.EventHandler = EventHandler; diff --git a/docs/_extensions/search_assets/modules/ResultRenderer.js b/docs/_extensions/search_assets/modules/ResultRenderer.js index 1a963da966..5a173a2414 100644 --- a/docs/_extensions/search_assets/modules/ResultRenderer.js +++ b/docs/_extensions/search_assets/modules/ResultRenderer.js @@ -8,7 +8,7 @@ class ResultRenderer { this.options = options; this.utils = utils; } - + /** * Render search results */ @@ -17,23 +17,23 @@ class ResultRenderer { console.warn('No container provided for rendering results'); return; } - + if (results.length === 0) { container.innerHTML = this.renderNoResults(query); return; } - + const html = results.map((result, index) => { const isSelected = index === 0; return this.renderResultItem(result, query, isSelected); }).join(''); - + container.innerHTML = `
${html}
`; - + // Bind click events this.bindResultEvents(container, results); } - + /** * Render a single result item */ @@ -41,15 +41,15 @@ class ResultRenderer { const title = this.utils.highlightText(result.title || 'Untitled', query); const summary = this.utils.highlightText(result.summary || result.content?.substring(0, 200) || '', query); const breadcrumb = this.utils.generateBreadcrumb(result.id); - + // Render matching sections const sectionsHtml = this.renderMatchingSections(result, query); - + // Show multiple matches indicator - const multipleMatchesIndicator = result.totalMatches > 1 + const multipleMatchesIndicator = result.totalMatches > 1 ? `${result.totalMatches} matches` : ''; - + return `
@@ -67,7 +67,7 @@ class ResultRenderer {
`; } - + /** * Render matching sections within a result */ @@ -75,27 +75,27 @@ class ResultRenderer { if (!result.matchingSections || result.matchingSections.length <= 1) { return ''; } - + // Show only the first few sections to avoid overwhelming const sectionsToShow = result.matchingSections.slice(0, 4); const hasMore = result.matchingSections.length > 4; - + const sectionsHtml = sectionsToShow.map(section => { const icon = this.utils.getSectionIcon(section.type, section.level); const sectionText = this.utils.highlightText(section.text, query); const anchor = section.anchor ? `#${section.anchor}` : ''; - + return `
${icon} ${sectionText}
`; }).join(''); - - const moreIndicator = hasMore + + const moreIndicator = hasMore ? `
+${result.matchingSections.length - 4} more sections
` : ''; - + return `
${sectionsHtml} @@ -103,7 +103,7 @@ class ResultRenderer {
`; } - + /** * Render no results state */ @@ -123,25 +123,25 @@ class ResultRenderer {
`; } - + /** * Bind click events to result items */ bindResultEvents(container, results) { container.querySelectorAll('.search-result-item').forEach((item, index) => { const result = results[index]; - + // Main item click - go to document item.addEventListener('click', (e) => { // Don't trigger if clicking on a section if (e.target.closest('.search-result-section')) { return; } - + const url = item.dataset.url; window.location.href = url; }); - + // Section clicks - go to specific section item.querySelectorAll('.search-result-section').forEach(sectionEl => { sectionEl.addEventListener('click', (e) => { @@ -153,65 +153,65 @@ class ResultRenderer { }); }); } - + /** * Get result items from container */ getResultItems(container) { return container.querySelectorAll('.search-result-item'); } - + /** * Get selected result item */ getSelectedResult(container) { return container.querySelector('.search-result-item.selected'); } - + /** * Select next result item */ selectNext(container) { const results = this.getResultItems(container); const selected = this.getSelectedResult(container); - + if (results.length === 0) return; - + if (!selected) { results[0].classList.add('selected'); return; } - + const currentIndex = Array.from(results).indexOf(selected); selected.classList.remove('selected'); - + const nextIndex = (currentIndex + 1) % results.length; results[nextIndex].classList.add('selected'); results[nextIndex].scrollIntoView({ block: 'nearest' }); } - + /** * Select previous result item */ selectPrevious(container) { const results = this.getResultItems(container); const selected = this.getSelectedResult(container); - + if (results.length === 0) return; - + if (!selected) { results[results.length - 1].classList.add('selected'); return; } - + const currentIndex = Array.from(results).indexOf(selected); selected.classList.remove('selected'); - + const prevIndex = currentIndex === 0 ? results.length - 1 : currentIndex - 1; results[prevIndex].classList.add('selected'); results[prevIndex].scrollIntoView({ block: 'nearest' }); } - + /** * Activate selected result */ @@ -221,7 +221,7 @@ class ResultRenderer { selected.click(); } } - + /** * Clear all selections */ @@ -229,7 +229,7 @@ class ResultRenderer { const results = this.getResultItems(container); results.forEach(result => result.classList.remove('selected')); } - + /** * Render loading state */ @@ -243,7 +243,7 @@ class ResultRenderer { `; } } - + /** * Render error state */ @@ -260,4 +260,4 @@ class ResultRenderer { } // Make ResultRenderer available globally -window.ResultRenderer = ResultRenderer; \ No newline at end of file +window.ResultRenderer = ResultRenderer; diff --git a/docs/_extensions/search_assets/modules/SearchEngine.js b/docs/_extensions/search_assets/modules/SearchEngine.js index d022a8a797..c3e4b777b1 100644 --- a/docs/_extensions/search_assets/modules/SearchEngine.js +++ b/docs/_extensions/search_assets/modules/SearchEngine.js @@ -18,7 +18,7 @@ class SearchEngine { // Dynamic facets - discovered from documents, not predefined this.facets = {}; // { facetKey: Set of values } } - + /** * Initialize the search engine with documents */ @@ -33,7 +33,7 @@ class SearchEngine { throw error; } } - + /** * Collect metadata for filtering using actual frontmatter values * Supports both new schema (topics, audience) and legacy (categories, personas) @@ -47,7 +47,7 @@ class SearchEngine { this.audience = new Set(); this.difficulties = new Set(); this.facets = {}; // Reset dynamic facets - + Object.values(this.documents).forEach(doc => { // Collect topics (new schema) or categories (legacy) const topicsField = doc.topics || doc.categories; @@ -58,7 +58,7 @@ class SearchEngine { topicsField.split(',').forEach(topic => this.topics.add(topic.trim())); } } - + // Collect actual frontmatter tags if (doc.tags) { if (Array.isArray(doc.tags)) { @@ -76,10 +76,10 @@ class SearchEngine { }); } else if (typeof doc.tags === 'string') { // Handle both comma-separated and space-separated tags - const allTags = doc.tags.includes(',') + const allTags = doc.tags.includes(',') ? doc.tags.split(',') : doc.tags.split(' '); - + allTags.forEach(tag => { if (tag && tag.trim()) { this.tags.add(tag.trim()); @@ -87,12 +87,12 @@ class SearchEngine { }); } } - + // Use actual content_type from frontmatter (not calculated doc_type) if (doc.content_type) { this.documentTypes.add(doc.content_type); } - + // Collect audience (new schema) or personas (legacy) const audienceField = doc.audience || doc.personas; if (audienceField) { @@ -102,11 +102,11 @@ class SearchEngine { this.audience.add(audienceField); } } - + if (doc.difficulty) { this.difficulties.add(doc.difficulty); } - + // Dynamically discover all facets from documents if (doc.facets && typeof doc.facets === 'object') { Object.entries(doc.facets).forEach(([facetKey, facetValue]) => { @@ -122,7 +122,7 @@ class SearchEngine { } }); } - + // Also check for flat facet fields (legacy modality, etc.) // These get added to facets dynamically if (doc.modality && !this.facets.modality) { @@ -133,7 +133,7 @@ class SearchEngine { } }); } - + /** * Get available filter options using actual frontmatter taxonomy * Returns both new field names and legacy names for backwards compatibility @@ -145,7 +145,7 @@ class SearchEngine { Object.entries(this.facets).forEach(([facetKey, facetSet]) => { facetOptions[facetKey] = Array.from(facetSet).sort(); }); - + return { // New schema names topics: Array.from(this.topics).sort(), @@ -161,7 +161,7 @@ class SearchEngine { facets: facetOptions }; } - + /** * Load Lunr.js library if not already loaded */ @@ -170,11 +170,11 @@ class SearchEngine { await this.utils.loadScript('https://unpkg.com/lunr@2.3.9/lunr.min.js'); } } - + /** * Build the Lunr search index * Supports both new schema (topics, audience) and legacy (categories, personas) - * + * * Field boosting rationale: * - Title matches are almost always what users want (highest boost) * - Description (from frontmatter) is hand-crafted summary (high boost) @@ -185,26 +185,26 @@ class SearchEngine { buildIndex() { const documentsArray = Object.values(this.documents); const self = this; - + this.index = lunr(function() { // Define fields with optimized boosting for documentation search patterns this.ref('id'); - + // Primary fields - highest relevance this.field('title', { boost: 10 }); // Title matches most important this.field('description', { boost: 8 }); // Frontmatter description (hand-crafted) - + // Secondary fields - structural relevance this.field('keywords', { boost: 7 }); // Explicit keywords this.field('headings_text', { boost: 5 }); // Section headings this.field('headings', { boost: 5 }); // Section headings (legacy format) this.field('tags', { boost: 4 }); // Taxonomy tags - + // Tertiary fields - content matching this.field('summary', { boost: 3 }); // Summary field this.field('topics', { boost: 2 }); // Topic categorization this.field('content', { boost: 1 }); // Full content (low to prevent long docs dominating) - + // Metadata fields - filtering support this.field('content_type', { boost: 1 }); this.field('audience', { boost: 1 }); @@ -212,7 +212,7 @@ class SearchEngine { this.field('modality', { boost: 1 }); this.field('section_path', { boost: 1 }); this.field('author', { boost: 1 }); - + // Add documents to index documentsArray.forEach((doc) => { try { @@ -242,7 +242,7 @@ class SearchEngine { }, this); }); } - + /** * Convert array to string for indexing */ @@ -252,7 +252,7 @@ class SearchEngine { } return arr || ''; } - + /** * Extract text from headings array */ @@ -260,7 +260,7 @@ class SearchEngine { if (!Array.isArray(headings)) return ''; return headings.map(h => h.text || '').join(' '); } - + /** * Perform search with query and optional filters */ @@ -268,31 +268,31 @@ class SearchEngine { if (!this.isInitialized || !this.index) { return []; } - + if (!query || query.trim().length < 2) { return []; } - + try { // Enhanced search with multiple strategies const results = this.performMultiStrategySearch(query); - + // Process and enhance results const enhancedResults = this.enhanceResults(results, query); - + // Apply filters const filteredResults = this.applyFilters(enhancedResults, filters); - + // Group and rank results const groupedResults = this.groupResultsByDocument(filteredResults, query); - + return groupedResults.slice(0, maxResults); - + } catch (error) { return []; } } - + /** * Apply filters to search results * Supports both new schema (topic, audience) and legacy (category, persona) filter names @@ -308,7 +308,7 @@ class SearchEngine { return false; } } - + // Tag filter if (filters.tag && filters.tag !== '') { const docTags = this.getDocumentTags(result); @@ -316,14 +316,14 @@ class SearchEngine { return false; } } - + // Document type filter (using actual frontmatter content_type) if (filters.type && filters.type !== '') { if (result.content_type !== filters.type) { return false; } } - + // Audience filter (new) or persona filter (legacy) const audienceFilter = filters.audience || filters.persona; if (audienceFilter && audienceFilter !== '') { @@ -332,14 +332,14 @@ class SearchEngine { return false; } } - + // Difficulty filter if (filters.difficulty && filters.difficulty !== '') { if (result.difficulty !== filters.difficulty) { return false; } } - + // Dynamic facet filters (e.g., filters.facets = { modality: 'text-only', framework: 'pytorch' }) if (filters.facets && typeof filters.facets === 'object') { for (const [facetKey, facetValue] of Object.entries(filters.facets)) { @@ -351,7 +351,7 @@ class SearchEngine { } } } - + // Legacy flat facet filters (e.g., filters.modality directly) // Check for any filter key that matches a known facet for (const facetKey of Object.keys(this.facets)) { @@ -362,11 +362,11 @@ class SearchEngine { } } } - + return true; }); } - + /** * Get a specific facet value for a document */ @@ -383,13 +383,13 @@ class SearchEngine { } return []; } - + /** * Get topics for a document (supports new schema and legacy categories) */ getDocumentTopics(doc) { const topics = []; - + // From explicit topics (new schema) or categories (legacy) const topicsField = doc.topics || doc.categories; if (topicsField) { @@ -399,34 +399,34 @@ class SearchEngine { topics.push(...topicsField.split(',').map(t => t.trim())); } } - + // From section path if (doc.section_path && Array.isArray(doc.section_path)) { topics.push(...doc.section_path); } - + // From document ID path if (doc.id) { const pathParts = doc.id.split('/').filter(part => part && part !== 'index'); topics.push(...pathParts); } - + return [...new Set(topics)]; // Remove duplicates } - + /** * Get categories for a document (legacy alias for getDocumentTopics) */ getDocumentCategories(doc) { return this.getDocumentTopics(doc); } - + /** * Get tags for a document */ getDocumentTags(doc) { if (!doc.tags) return []; - + if (Array.isArray(doc.tags)) { // Handle array of tags that might contain space-separated strings const flatTags = []; @@ -444,12 +444,12 @@ class SearchEngine { }); return flatTags; } - + // Handle string tags - check for both comma and space separation if (typeof doc.tags === 'string') { const allTags = []; const tagString = doc.tags.trim(); - + if (tagString.includes(',')) { // Comma-separated tags tagString.split(',').forEach(tag => { @@ -465,14 +465,14 @@ class SearchEngine { } }); } - + return allTags; } - + return []; } - - + + /** * Get audience for a document (supports new schema and legacy personas) */ @@ -480,21 +480,21 @@ class SearchEngine { // Support both audience (new) and personas (legacy) const audienceField = doc.audience || doc.personas; if (!audienceField) return []; - + if (Array.isArray(audienceField)) { return audienceField; } - + return [audienceField]; } - + /** * Get personas for a document (legacy alias for getDocumentAudience) */ getDocumentPersonas(doc) { return this.getDocumentAudience(doc); } - + /** * Perform search with multiple strategies */ @@ -502,21 +502,21 @@ class SearchEngine { const strategies = [ // Exact phrase search with wildcards `"${query}" ${query}*`, - // Fuzzy search with wildcards + // Fuzzy search with wildcards `${query}* ${query}~2`, // Individual terms with boost query.split(/\s+/).map(term => `${term}*`).join(' '), // Fallback: just the query query ]; - + let allResults = []; const seenIds = new Set(); - + for (const strategy of strategies) { try { const results = this.index.search(strategy); - + // Add new results (avoid duplicates) results.forEach(result => { if (!seenIds.has(result.ref)) { @@ -527,40 +527,40 @@ class SearchEngine { }); } }); - + // If we have enough good results, stop if (allResults.length >= 30) break; - + } catch (strategyError) { console.warn(`Search strategy failed: ${strategy}`, strategyError); } } - + return allResults; } - + /** * Enhance search results with document data and apply re-ranking */ enhanceResults(results, query) { const queryLower = query.toLowerCase().trim(); const queryTerms = queryLower.split(/\s+/); - + return results.map(result => { const doc = this.documents[result.ref]; if (!doc) { console.warn(`Document not found: ${result.ref}`); return null; } - + // Calculate additional relevance boost for title matches const titleBoost = this.calculateTitleBoost(doc, queryLower, queryTerms); const keywordBoost = this.calculateKeywordBoost(doc, queryTerms); const descriptionBoost = this.calculateDescriptionBoost(doc, queryTerms); - + // Apply boosts to base score const enhancedScore = result.score * (1 + titleBoost + keywordBoost + descriptionBoost); - + return { ...doc, score: enhancedScore, @@ -574,17 +574,17 @@ class SearchEngine { }; }).filter(Boolean); // Remove null results } - + /** * Calculate boost for title matches * Heavily rewards exact and partial title matches */ calculateTitleBoost(doc, queryLower, queryTerms) { if (!doc.title) return 0; - + const titleLower = doc.title.toLowerCase(); let boost = 0; - + // Exact title match (highest boost) if (titleLower === queryLower) { boost += 10; @@ -610,46 +610,46 @@ class SearchEngine { boost += 2 * (matchingTerms.length / queryTerms.length); } } - + // Additional boost if title contains query as a distinct word const titleWords = titleLower.split(/[\s\-_:]+/); if (titleWords.some(word => word === queryLower || word.startsWith(queryLower))) { boost += 2; } - + return boost; } - + /** * Calculate boost for keyword matches */ calculateKeywordBoost(doc, queryTerms) { if (!doc.keywords) return 0; - - const keywords = Array.isArray(doc.keywords) + + const keywords = Array.isArray(doc.keywords) ? doc.keywords.map(k => k.toLowerCase()) : doc.keywords.toLowerCase().split(/[\s,]+/); - + let boost = 0; - + queryTerms.forEach(term => { if (keywords.some(kw => kw === term || kw.startsWith(term))) { boost += 1.5; } }); - + return boost; } - + /** * Calculate boost for description matches */ calculateDescriptionBoost(doc, queryTerms) { if (!doc.description) return 0; - + const descLower = doc.description.toLowerCase(); let boost = 0; - + // Check if query terms appear early in description queryTerms.forEach(term => { const pos = descLower.indexOf(term); @@ -658,23 +658,23 @@ class SearchEngine { boost += pos < 50 ? 1 : 0.5; } }); - + return boost; } - + /** * Group results by document and find matching sections */ groupResultsByDocument(results, query) { const grouped = new Map(); - + results.forEach(result => { const docId = result.id; - + if (!grouped.has(docId)) { // Find matching sections within this document const matchingSections = this.findMatchingSections(result, query); - + grouped.set(docId, { ...result, matchingSections, @@ -685,30 +685,30 @@ class SearchEngine { // Document already exists, combine scores and sections const existing = grouped.get(docId); const additionalSections = this.findMatchingSections(result, query); - + existing.matchingSections = this.mergeSections(existing.matchingSections, additionalSections); existing.totalMatches += 1; existing.combinedScore = Math.max(existing.combinedScore, result.score); } }); - + // Convert map to array and sort by combined score return Array.from(grouped.values()) .sort((a, b) => b.combinedScore - a.combinedScore); } - + /** * Find matching sections within a document */ findMatchingSections(result, query) { const matchingSections = []; const queryTerms = query.toLowerCase().split(/\s+/); - + // Check if title matches if (result.title) { const titleText = result.title.toLowerCase(); const hasMatch = queryTerms.some(term => titleText.includes(term)); - + if (hasMatch) { matchingSections.push({ type: 'title', @@ -718,13 +718,13 @@ class SearchEngine { }); } } - + // Check headings for matches if (result.headings && Array.isArray(result.headings)) { result.headings.forEach(heading => { const headingText = heading.text?.toLowerCase() || ''; const hasMatch = queryTerms.some(term => headingText.includes(term)); - + if (hasMatch) { matchingSections.push({ type: 'heading', @@ -735,7 +735,7 @@ class SearchEngine { } }); } - + // If no specific sections found, add a general content match if (matchingSections.length === 0) { matchingSections.push({ @@ -745,43 +745,43 @@ class SearchEngine { anchor: '' }); } - + return matchingSections; } - + /** * Generate anchor link similar to how Sphinx does it */ generateAnchor(headingText) { if (!headingText) return ''; - + return headingText .toLowerCase() .replace(/[^\w\s-]/g, '') // Remove special chars .replace(/\s+/g, '-') // Replace spaces with hyphens .trim(); } - + /** * Merge sections, avoiding duplicates */ mergeSections(existing, additional) { const merged = [...existing]; - + additional.forEach(section => { - const isDuplicate = existing.some(existingSection => - existingSection.text === section.text && + const isDuplicate = existing.some(existingSection => + existingSection.text === section.text && existingSection.type === section.type ); - + if (!isDuplicate) { merged.push(section); } }); - + return merged; } - + /** * Get search statistics */ @@ -791,7 +791,7 @@ class SearchEngine { Object.entries(this.facets).forEach(([key, valueSet]) => { facetStats[key] = valueSet.size; }); - + return { documentsIndexed: Object.keys(this.documents).length, topicsAvailable: this.topics.size, @@ -804,7 +804,7 @@ class SearchEngine { isInitialized: this.isInitialized }; } - + /** * Check if the search engine is ready */ @@ -814,4 +814,4 @@ class SearchEngine { } // Make SearchEngine available globally -window.SearchEngine = SearchEngine; \ No newline at end of file +window.SearchEngine = SearchEngine; diff --git a/docs/_extensions/search_assets/modules/SearchInterface.js b/docs/_extensions/search_assets/modules/SearchInterface.js index 569280b818..a0d793cbdb 100644 --- a/docs/_extensions/search_assets/modules/SearchInterface.js +++ b/docs/_extensions/search_assets/modules/SearchInterface.js @@ -12,7 +12,7 @@ class SearchInterface { this.resultsContainer = null; this.statsContainer = null; } - + /** * Create the search interface elements */ @@ -27,12 +27,12 @@ class SearchInterface { } console.log('✅ Search interface created'); } - + /** * Check if we're on the search page */ isSearchPage() { - return window.location.pathname.includes('/search') || + return window.location.pathname.includes('/search') || window.location.pathname.includes('/search.html') || window.location.pathname.endsWith('search/') || document.querySelector('#search-results') !== null || @@ -41,7 +41,7 @@ class SearchInterface { document.title.toLowerCase().includes('search') || document.querySelector('h1')?.textContent.toLowerCase().includes('search'); } - + /** * Enhance the existing search page using the template structure */ @@ -49,51 +49,51 @@ class SearchInterface { console.log('🔍 Enhancing search page using existing template...'); console.log('📄 Page URL:', window.location.href); console.log('📋 Page title:', document.title); - + // Use the template's existing elements this.input = document.querySelector('#enhanced-search-page-input'); this.resultsContainer = document.querySelector('#enhanced-search-page-results'); - + console.log('🔎 Template search input found:', !!this.input); console.log('📦 Template results container found:', !!this.resultsContainer); - + if (this.input && this.resultsContainer) { console.log('✅ Using existing template structure - no additional setup needed'); // The template's JavaScript will handle everything return; } - + // Fallback for non-template pages console.log('⚠️ Template elements not found, falling back to generic search page detection'); this.fallbackToGenericSearchPage(); } - + /** * Fallback for pages that don't use the template */ fallbackToGenericSearchPage() { // Find existing search elements on generic pages - this.input = document.querySelector('#searchbox input[type="text"]') || + this.input = document.querySelector('#searchbox input[type="text"]') || document.querySelector('input[name="q"]') || document.querySelector('.search input[type="text"]'); - + // Find or create results container this.resultsContainer = document.querySelector('#search-results') || document.querySelector('.search-results') || this.createResultsContainer(); - + // Create stats container this.statsContainer = this.createStatsContainer(); - + // Hide default Sphinx search results if they exist this.hideDefaultResults(); - + // Initialize with empty state this.showEmptyState(); - + console.log('✅ Generic search page enhanced'); } - + /** * Create results container if it doesn't exist */ @@ -101,7 +101,7 @@ class SearchInterface { const container = document.createElement('div'); container.id = 'enhanced-search-results'; container.className = 'enhanced-search-results'; - + // Add basic styling to ensure proper positioning container.style.cssText = ` width: 100%; @@ -111,10 +111,10 @@ class SearchInterface { position: relative; z-index: 1; `; - + // Find the best place to insert it within the main content area const insertLocation = this.findBestInsertLocation(); - + if (insertLocation.parent && insertLocation.method === 'append') { insertLocation.parent.appendChild(container); console.log(`✅ Results container added to: ${insertLocation.parent.className || insertLocation.parent.tagName}`); @@ -125,10 +125,10 @@ class SearchInterface { // Last resort - create a wrapper in main content this.createInMainContent(container); } - + return container; } - + /** * Find the best location to insert search results */ @@ -138,13 +138,13 @@ class SearchInterface { if (searchResults) { return { parent: searchResults, method: 'append' }; } - + // Look for search form and place results after it let searchForm = document.querySelector('#searchbox, .search form, form[action*="search"]'); if (searchForm) { return { parent: searchForm, method: 'after' }; } - + // Look for main content containers (common Sphinx/theme classes) const mainSelectors = [ '.document .body', @@ -157,23 +157,23 @@ class SearchInterface { '.rst-content', '.body-content' ]; - + for (const selector of mainSelectors) { const element = document.querySelector(selector); if (element) { return { parent: element, method: 'append' }; } } - + // Try to find any container that's not the body const anyContainer = document.querySelector('.container, .wrapper, .page, #content'); if (anyContainer) { return { parent: anyContainer, method: 'append' }; } - + return { parent: null, method: null }; } - + /** * Create container in main content as last resort */ @@ -186,22 +186,22 @@ class SearchInterface { margin: 2rem auto; padding: 0 1rem; `; - + // Add a title const title = document.createElement('h1'); title.textContent = 'Search Results'; title.style.cssText = 'margin-bottom: 1rem;'; wrapper.appendChild(title); - + // Add the container wrapper.appendChild(container); - + // Insert into body, but with proper styling document.body.appendChild(wrapper); - + console.log('⚠️ Created search results in body with wrapper - consider improving page structure'); } - + /** * Create stats container */ @@ -209,15 +209,15 @@ class SearchInterface { const container = document.createElement('div'); container.className = 'enhanced-search-stats'; container.style.cssText = 'margin: 1rem 0; font-size: 0.9rem; color: #666;'; - + // Insert before results if (this.resultsContainer && this.resultsContainer.parentNode) { this.resultsContainer.parentNode.insertBefore(container, this.resultsContainer); } - + return container; } - + /** * Hide default Sphinx search results */ @@ -230,7 +230,7 @@ class SearchInterface { el.style.display = 'none'; }); } - + /** * Create the main search modal (legacy - kept for compatibility) */ @@ -245,8 +245,8 @@ class SearchInterface {
-
`; - + document.body.appendChild(modal); - + // Cache references this.modal = modal; this.input = modal.querySelector('#enhanced-search-input'); this.resultsContainer = modal.querySelector('.enhanced-search-results'); this.statsContainer = modal.querySelector('.enhanced-search-stats'); - + // Add event handlers for closing the modal const closeButton = modal.querySelector('.enhanced-search-close'); const backdrop = modal.querySelector('.enhanced-search-backdrop'); - + if (closeButton) { closeButton.addEventListener('click', () => this.hideModal()); } - + if (backdrop) { backdrop.addEventListener('click', () => this.hideModal()); } - + // Hide modal by default modal.style.display = 'none'; - + // Initialize with empty state this.showEmptyState(); } - + /** * Replace or enhance existing search button to show modal */ @@ -304,7 +304,7 @@ class SearchInterface { const searchForm = document.querySelector('#searchbox form') || document.querySelector('.search form') || document.querySelector('form[action*="search"]'); - + if (searchForm) { // Prevent form submission and show modal instead searchForm.addEventListener('submit', (e) => { @@ -313,7 +313,7 @@ class SearchInterface { }); console.log('✅ Search form enhanced to show modal'); } - + // Find search button specifically and enhance it const existingButton = document.querySelector('.search-button-field, .search-button__button'); if (existingButton) { @@ -323,7 +323,7 @@ class SearchInterface { }); console.log('✅ Search button enhanced to show modal'); } - + // Also look for search input fields and enhance them const searchInput = document.querySelector('#searchbox input[type="text"]') || document.querySelector('.search input[type="text"]'); @@ -334,7 +334,7 @@ class SearchInterface { console.log('✅ Search input enhanced to show modal on focus'); } } - + /** * Show the search interface (focus input or show modal) */ @@ -346,7 +346,7 @@ class SearchInterface { this.input.select(); } } - + /** * Hide the search interface (hide modal or blur input) */ @@ -357,7 +357,7 @@ class SearchInterface { this.input.blur(); } } - + /** * Show the modal */ @@ -376,7 +376,7 @@ class SearchInterface { console.log('🔍 Search modal shown'); } } - + /** * Hide the modal */ @@ -395,42 +395,42 @@ class SearchInterface { console.log('🔍 Search modal hidden'); } } - + /** * Get the search input element */ getInput() { return this.input; } - + /** * Get the results container */ getResultsContainer() { return this.resultsContainer; } - + /** * Get the stats container */ getStatsContainer() { return this.statsContainer; } - + /** * Get the modal element */ getModal() { return this.modal; } - + /** * Check if modal is visible */ isModalVisible() { return this.isVisible && this.modal && this.modal.style.display !== 'none'; } - + /** * Show empty state in results */ @@ -452,7 +452,7 @@ class SearchInterface { `; } } - + /** * Show no results state */ @@ -474,7 +474,7 @@ class SearchInterface { `; } } - + /** * Show error state */ @@ -488,7 +488,7 @@ class SearchInterface { `; } } - + /** * Update search statistics */ @@ -501,7 +501,7 @@ class SearchInterface { } } } - + /** * Clear search statistics */ @@ -510,14 +510,14 @@ class SearchInterface { this.statsContainer.innerHTML = ''; } } - + /** * Get current search query */ getQuery() { return this.input ? this.input.value.trim() : ''; } - + /** * Set search query */ @@ -526,7 +526,7 @@ class SearchInterface { this.input.value = query; } } - + /** * Clear search query */ @@ -535,7 +535,7 @@ class SearchInterface { this.input.value = ''; } } - + /** * Focus the search input */ @@ -544,21 +544,21 @@ class SearchInterface { this.input.focus(); } } - + /** * Get close button for event binding */ getCloseButton() { return this.modal ? this.modal.querySelector('.enhanced-search-close') : null; } - + /** * Get backdrop for event binding */ getBackdrop() { return this.modal ? this.modal.querySelector('.enhanced-search-backdrop') : null; } - + /** * Escape HTML to prevent XSS */ @@ -570,7 +570,7 @@ class SearchInterface { .replace(/"/g, """) .replace(/'/g, "'"); } - + /** * Add CSS class to modal */ @@ -579,7 +579,7 @@ class SearchInterface { this.modal.classList.add(className); } } - + /** * Remove CSS class from modal */ @@ -588,14 +588,14 @@ class SearchInterface { this.modal.classList.remove(className); } } - + /** * Check if modal has class */ hasModalClass(className) { return this.modal ? this.modal.classList.contains(className) : false; } - + /** * Destroy the search interface */ @@ -612,4 +612,4 @@ class SearchInterface { } // Make SearchInterface available globally -window.SearchInterface = SearchInterface; \ No newline at end of file +window.SearchInterface = SearchInterface; diff --git a/docs/_extensions/search_assets/modules/SearchPageManager.js b/docs/_extensions/search_assets/modules/SearchPageManager.js index 6122fcc2af..7225f0fba1 100644 --- a/docs/_extensions/search_assets/modules/SearchPageManager.js +++ b/docs/_extensions/search_assets/modules/SearchPageManager.js @@ -173,7 +173,7 @@ class SearchPageManager {
- +
- +
- + ${facetFilters} `; @@ -731,8 +731,8 @@ class SearchPageManager { if (!topics || topics.length === 0) return ''; const topicBadges = topics.slice(0, 3).map(topic => - ` 📁 ${this.escapeHtml(topic)} @@ -791,8 +791,8 @@ class SearchPageManager { : ''; return ` -
{ if (term.length > 1) { const regex = new RegExp(`(${this.escapeRegex(term)})`, 'gi'); highlighted = highlighted.replace(regex, `$1`); } }); - + return highlighted; } - + /** * Generate breadcrumb from document ID */ @@ -68,7 +68,7 @@ class Utils { const parts = docId.split('/').filter(part => part && part !== 'index'); return parts.length > 0 ? parts.join(' › ') : 'Home'; } - + /** * Generate anchor link from heading text (Sphinx-style) */ @@ -79,7 +79,7 @@ class Utils { .replace(/\s+/g, '-') // Replace spaces with hyphens .trim(); } - + /** * Get document URL from result object */ @@ -89,7 +89,7 @@ class Utils { } return `${result.id.replace(/^\/+/, '')}.html`; } - + /** * Get appropriate icon for section type */ @@ -107,7 +107,7 @@ class Utils { return ''; } } - + /** * Load external script (like Lunr.js) */ @@ -120,7 +120,7 @@ class Utils { document.head.appendChild(script); }); } - + /** * Safe substring with fallback */ @@ -128,14 +128,14 @@ class Utils { if (!str) return fallback; return str.length > maxLength ? str.substring(0, maxLength) : str; } - + /** * Check if string is valid and not empty */ isValidString(str) { return typeof str === 'string' && str.trim().length > 0; } - + /** * Safe array access with fallback */ @@ -145,4 +145,4 @@ class Utils { } // Make Utils available globally -window.Utils = Utils; \ No newline at end of file +window.Utils = Utils; diff --git a/docs/_extensions/search_assets/templates/search.html b/docs/_extensions/search_assets/templates/search.html index ed4a5b4e4d..7f3ba378a7 100644 --- a/docs/_extensions/search_assets/templates/search.html +++ b/docs/_extensions/search_assets/templates/search.html @@ -46,4 +46,4 @@

Search Documentation

{% block scripts -%} {{ super() }} {# Search page script is loaded via html_js_files in conf.py #} -{%- endblock scripts %} \ No newline at end of file +{%- endblock scripts %}