diff --git a/sources/core/codeguard-0-additional-cryptography.md b/sources/core/codeguard-0-additional-cryptography.md index 6bcb4fd..db5edae 100644 --- a/sources/core/codeguard-0-additional-cryptography.md +++ b/sources/core/codeguard-0-additional-cryptography.md @@ -14,6 +14,9 @@ languages: - typescript - xml - yaml +tags: +- data-security +- secrets alwaysApply: false --- diff --git a/sources/core/codeguard-0-api-web-services.md b/sources/core/codeguard-0-api-web-services.md index 94a51fc..2c78e45 100644 --- a/sources/core/codeguard-0-api-web-services.md +++ b/sources/core/codeguard-0-api-web-services.md @@ -12,6 +12,8 @@ languages: - typescript - xml - yaml +tags: +- web alwaysApply: false --- diff --git a/sources/core/codeguard-0-authentication-mfa.md b/sources/core/codeguard-0-authentication-mfa.md index 2be26cc..580af4e 100644 --- a/sources/core/codeguard-0-authentication-mfa.md +++ b/sources/core/codeguard-0-authentication-mfa.md @@ -13,6 +13,9 @@ languages: - ruby - swift - typescript +tags: +- authentication +- web alwaysApply: false --- diff --git a/sources/core/codeguard-0-client-side-web-security.md b/sources/core/codeguard-0-client-side-web-security.md index 17a4e89..c2b0c68 100644 --- a/sources/core/codeguard-0-client-side-web-security.md +++ b/sources/core/codeguard-0-client-side-web-security.md @@ -8,6 +8,8 @@ languages: - php - typescript - vlang +tags: +- web alwaysApply: false --- diff --git a/sources/core/codeguard-0-cloud-orchestration-kubernetes.md b/sources/core/codeguard-0-cloud-orchestration-kubernetes.md index ec2e982..828edd9 100644 --- a/sources/core/codeguard-0-cloud-orchestration-kubernetes.md +++ b/sources/core/codeguard-0-cloud-orchestration-kubernetes.md @@ -4,6 +4,8 @@ description: Kubernetes hardening (RBAC, admission policies, network policies, s languages: - javascript - yaml +tags: +- infrastructure alwaysApply: false --- diff --git a/sources/core/codeguard-0-data-storage.md b/sources/core/codeguard-0-data-storage.md index 6bd68f5..e01057b 100644 --- a/sources/core/codeguard-0-data-storage.md +++ b/sources/core/codeguard-0-data-storage.md @@ -6,6 +6,9 @@ languages: - javascript - sql - yaml +tags: +- data-security +- infrastructure alwaysApply: false --- diff --git a/sources/core/codeguard-0-devops-ci-cd-containers.md b/sources/core/codeguard-0-devops-ci-cd-containers.md index 1db3562..52bb26c 100644 --- a/sources/core/codeguard-0-devops-ci-cd-containers.md +++ b/sources/core/codeguard-0-devops-ci-cd-containers.md @@ -8,6 +8,8 @@ languages: - shell - xml - yaml +tags: +- infrastructure alwaysApply: false --- diff --git a/sources/core/codeguard-0-iac-security.md b/sources/core/codeguard-0-iac-security.md index 0785120..17fe6de 100644 --- a/sources/core/codeguard-0-iac-security.md +++ b/sources/core/codeguard-0-iac-security.md @@ -8,6 +8,8 @@ languages: - ruby - shell - yaml +tags: +- infrastructure alwaysApply: false --- diff --git a/sources/core/codeguard-0-input-validation-injection.md b/sources/core/codeguard-0-input-validation-injection.md index 9ae2ab1..fc15368 100644 --- a/sources/core/codeguard-0-input-validation-injection.md +++ b/sources/core/codeguard-0-input-validation-injection.md @@ -14,6 +14,8 @@ languages: - shell - sql - typescript +tags: +- web alwaysApply: false --- diff --git a/sources/core/codeguard-0-logging.md b/sources/core/codeguard-0-logging.md index 659be01..2a354aa 100644 --- a/sources/core/codeguard-0-logging.md +++ b/sources/core/codeguard-0-logging.md @@ -5,6 +5,8 @@ languages: - c - javascript - yaml +tags: +- privacy alwaysApply: false --- diff --git a/sources/core/codeguard-0-privacy-data-protection.md b/sources/core/codeguard-0-privacy-data-protection.md index f28876d..22f522d 100644 --- a/sources/core/codeguard-0-privacy-data-protection.md +++ b/sources/core/codeguard-0-privacy-data-protection.md @@ -5,6 +5,8 @@ languages: - javascript - matlab - yaml +tags: +- privacy alwaysApply: false --- diff --git a/sources/core/codeguard-0-session-management-and-cookies.md b/sources/core/codeguard-0-session-management-and-cookies.md index be73bf8..e0d53e8 100644 --- a/sources/core/codeguard-0-session-management-and-cookies.md +++ b/sources/core/codeguard-0-session-management-and-cookies.md @@ -11,6 +11,9 @@ languages: - python - ruby - typescript +tags: +- authentication +- web alwaysApply: false --- diff --git a/sources/core/codeguard-1-digital-certificates.md b/sources/core/codeguard-1-digital-certificates.md index 3d73c70..c333fa2 100644 --- a/sources/core/codeguard-1-digital-certificates.md +++ b/sources/core/codeguard-1-digital-certificates.md @@ -1,6 +1,8 @@ --- description: Certificate Best Practices languages: [] +tags: +- secrets alwaysApply: true --- diff --git a/sources/core/codeguard-1-hardcoded-credentials.md b/sources/core/codeguard-1-hardcoded-credentials.md index 5f885ec..978d48a 100644 --- a/sources/core/codeguard-1-hardcoded-credentials.md +++ b/sources/core/codeguard-1-hardcoded-credentials.md @@ -1,6 +1,8 @@ --- description: No Hardcoded Credentials languages: [] +tags: +- secrets alwaysApply: true --- diff --git a/src/convert_to_ide_formats.py b/src/convert_to_ide_formats.py index 1d4f5fb..3c3cab3 100644 --- a/src/convert_to_ide_formats.py +++ b/src/convert_to_ide_formats.py @@ -36,6 +36,23 @@ def sync_plugin_metadata(version: str) -> None: print(f"✅ Synced plugin metadata to {version}") +def matches_tag_filter(rule_tags: list[str], filter_tags: list[str]) -> bool: + """ + Check if rule has all required tags (AND logic). + + Args: + rule_tags: List of tags from the rule (already normalized to lowercase) + filter_tags: List of tags to filter by (already normalized to lowercase) + + Returns: + True if rule has all filter tags (or no filter), False otherwise + """ + if not filter_tags: + return True # No filter means all pass + + return all(tag in rule_tags for tag in filter_tags) + + def update_skill_md(language_to_rules: dict[str, list[str]], skill_path: str) -> None: """ Update SKILL.md with language-to-rules mapping table. @@ -81,7 +98,7 @@ def update_skill_md(language_to_rules: dict[str, list[str]], skill_path: str) -> print(f"Updated SKILL.md with language mappings") -def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: bool = True, version: str = None) -> dict[str, list[str]]: +def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: bool = True, version: str = None, filter_tags: list[str] = None) -> dict[str, list[str]]: """ Convert rule file(s) to all supported IDE formats using RuleConverter. @@ -90,6 +107,7 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: output_dir: Output directory (default: 'dist/') include_claudecode: Whether to generate Claude Code plugin (default: True, only for core rules) version: Version string to use (default: read from pyproject.toml) + filter_tags: Optional list of tags to filter by (AND logic, case-insensitive) Returns: Dictionary with 'success' and 'errors' lists: @@ -138,7 +156,7 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: # Setup output directory output_base = Path(output_dir) - results = {"success": [], "errors": []} + results = {"success": [], "errors": [], "skipped": []} language_to_rules = defaultdict(list) # Process each file @@ -146,6 +164,11 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: try: # Convert the file (raises exceptions on error) result = converter.convert(md_file) + + # Apply tag filter if specified + if filter_tags and not matches_tag_filter(result.tags, filter_tags): + results["skipped"].append(result.filename) + continue # Write each format output_files = [] @@ -192,9 +215,14 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: results["errors"].append(error_msg) # Summary - print( - f"\nResults: {len(results['success'])} success, {len(results['errors'])} errors" - ) + if filter_tags: + print( + f"\nResults: {len(results['success'])} success, {len(results['skipped'])} skipped (tag filter), {len(results['errors'])} errors" + ) + else: + print( + f"\nResults: {len(results['success'])} success, {len(results['errors'])} errors" + ) # Generate SKILL.md with language mappings (only if Claude Code is included) if include_claudecode and language_to_rules: @@ -256,6 +284,12 @@ def _resolve_source_paths(args) -> list[Path]: default="dist", help="Output directory for generated bundles (default: dist).", ) + parser.add_argument( + "--tag", + "--tags", + dest="tags", + help="Filter rules by tags (comma-separated, case-insensitive, AND logic). Example: --tag api,web-security", + ) cli_args = parser.parse_args() source_paths = _resolve_source_paths(cli_args) @@ -316,7 +350,16 @@ def _resolve_source_paths(args) -> list[Path]: print() # Convert all sources - aggregated = {"success": [], "errors": []} + aggregated = {"success": [], "errors": [], "skipped": []} + # Parse comma-separated tags and normalize to lowercase + filter_tags = None + if cli_args.tags: + filter_tags = [tag.strip().lower() for tag in cli_args.tags.split(",") if tag.strip()] + + # Print tag filter info if active + if filter_tags: + print(f"Tag filter active: {', '.join(filter_tags)} (AND logic - rules must have all tags)\n") + for source_path in source_paths: is_core = source_path == Path("sources/core") @@ -325,11 +368,14 @@ def _resolve_source_paths(args) -> list[Path]: str(source_path), cli_args.output_dir, include_claudecode=is_core, - version=version + version=version, + filter_tags=filter_tags ) aggregated["success"].extend(results["success"]) aggregated["errors"].extend(results["errors"]) + if "skipped" in results: + aggregated["skipped"].extend(results["skipped"]) print("") if aggregated["errors"]: diff --git a/src/converter.py b/src/converter.py index 39f4fce..4a71a94 100644 --- a/src/converter.py +++ b/src/converter.py @@ -12,7 +12,7 @@ from pathlib import Path from language_mappings import languages_to_globs -from utils import parse_frontmatter_and_content +from utils import parse_frontmatter_and_content, validate_tags from formats import ( BaseFormat, ProcessedRule, @@ -45,6 +45,7 @@ class ConversionResult: basename: Filename without extension (e.g., 'my-rule') outputs: Dictionary mapping format names to their outputs languages: List of programming languages the rule applies to, empty list if always applies + tags: List of tags for categorizing and filtering rules Example: result = ConversionResult( filename="my-rule.md", @@ -56,7 +57,8 @@ class ConversionResult: subpath=".cursor/rules" ) }, - languages=["python", "javascript"] + languages=["python", "javascript"], + tags=["authentication", "web-security"] ) """ @@ -64,6 +66,7 @@ class ConversionResult: basename: str outputs: dict[str, FormatOutput] languages: list[str] + tags: list[str] class RuleConverter: @@ -159,6 +162,11 @@ def parse_rule(self, content: str, filename: str) -> ProcessedRule: f"'languages' must be a non-empty list in {filename} when alwaysApply is false" ) + # Parse and validate tags (optional field) + tags = [] + if "tags" in frontmatter: + tags = validate_tags(frontmatter["tags"], filename) + # Adding rule_id to the beginning of the content rule_id = Path(filename).stem markdown_content = f"rule_id: {rule_id}\n\n{markdown_content}" @@ -169,6 +177,7 @@ def parse_rule(self, content: str, filename: str) -> ProcessedRule: always_apply=always_apply, content=markdown_content, filename=filename, + tags=tags, ) def generate_globs(self, languages: list[str]) -> str: @@ -242,4 +251,5 @@ def convert(self, filepath: str) -> ConversionResult: basename=basename, outputs=outputs, languages=rule.languages, + tags=rule.tags, ) diff --git a/src/formats/base.py b/src/formats/base.py index 5af8732..65c75e5 100644 --- a/src/formats/base.py +++ b/src/formats/base.py @@ -25,6 +25,7 @@ class ProcessedRule: always_apply: Whether this rule should apply to all files content: The actual rule content in markdown format filename: Original filename of the rule + tags: List of tags for categorizing and filtering rules """ description: str @@ -32,6 +33,7 @@ class ProcessedRule: always_apply: bool content: str filename: str + tags: list[str] class BaseFormat(ABC): diff --git a/src/tag_mappings.py b/src/tag_mappings.py new file mode 100644 index 0000000..304992b --- /dev/null +++ b/src/tag_mappings.py @@ -0,0 +1,21 @@ +# Copyright 2025 Cisco Systems, Inc. and its affiliates +# +# SPDX-License-Identifier: Apache-2.0 + +""" +Tag Mappings + +Centralized list of known tags for categorizing security rules. +""" + +# Known tags used in rules +# Add new tags here as they are introduced in rules +KNOWN_TAGS = { + "authentication", + "data-security", + "infrastructure", + "privacy", + "secrets", + "web", +} + diff --git a/src/utils.py b/src/utils.py index fb0fed6..a360e74 100644 --- a/src/utils.py +++ b/src/utils.py @@ -57,6 +57,51 @@ def parse_frontmatter_and_content(content: str) -> tuple[dict | None, str]: return frontmatter, markdown_content.strip() +def validate_tags(tags, filename=None) -> list[str]: + """ + Validate tags list and return normalized (lowercase) tags. + + Args: + tags: The tags value to validate (should be a non-empty list) + filename: Optional filename for better error messages + + Returns: + List of normalized (lowercase) tags with duplicates removed. + Original order is preserved. + + Raises: + ValueError: If tags are invalid (wrong type, empty list, contain whitespace, etc.) + + Note: + - An empty tags list (tags: []) is considered invalid. If you have no tags, + omit the 'tags' field entirely from the frontmatter. + - Duplicate tags (after normalization) are automatically removed while + preserving the order of first occurrence. + """ + context = f" in {filename}" if filename else "" + + if not isinstance(tags, list): + raise ValueError(f"'tags' must be a list{context}") + + if not tags: + raise ValueError(f"'tags' list cannot be empty{context}. Omit the field if you have no tags.") + + normalized = [] + for tag in tags: + if not isinstance(tag, str): + raise ValueError(f"All tags must be strings{context}, found: {type(tag).__name__}") + + if any(c.isspace() for c in tag): + raise ValueError(f"Tags cannot contain whitespace: '{tag}'{context}") + + if not tag: + raise ValueError(f"Empty tag found{context}") + + normalized.append(tag.lower()) + + return list(dict.fromkeys(normalized)) + + def get_version_from_pyproject() -> str: """ Read version from pyproject.toml using Python's built-in TOML parser. diff --git a/src/validate_unified_rules.py b/src/validate_unified_rules.py index bd509bc..8fd454c 100755 --- a/src/validate_unified_rules.py +++ b/src/validate_unified_rules.py @@ -12,7 +12,8 @@ from pathlib import Path from language_mappings import LANGUAGE_TO_EXTENSIONS -from utils import parse_frontmatter_and_content +from tag_mappings import KNOWN_TAGS +from utils import parse_frontmatter_and_content, validate_tags def validate_rule(file_path: Path) -> dict[str, list[str]]: @@ -54,6 +55,17 @@ def validate_rule(file_path: Path) -> dict[str, list[str]]: if unknown: warnings.append(f"Unknown languages: {', '.join(unknown)}") + # Validate tags if present + if "tags" in frontmatter: + try: + normalized_tags = validate_tags(frontmatter["tags"], file_path.name) + # Error on tags not in known list + unknown_tags = [tag for tag in normalized_tags if tag not in KNOWN_TAGS] + if unknown_tags: + errors.append(f"Unknown tags (add to KNOWN_TAGS): {', '.join(sorted(unknown_tags))}") + except ValueError as e: + errors.append(str(e)) + # Check content exists if not markdown_content.strip(): errors.append("Rule content cannot be empty")