From caf91bdc0e16c0e2c643b1c5c8f681e2bc2789b8 Mon Sep 17 00:00:00 2001 From: Keyvanhardani Date: Sat, 10 Jan 2026 16:04:20 +0100 Subject: [PATCH] feat: Add German umlaut validation for Write/Edit operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a new plugin and hook example for validating German text content to ensure correct umlaut usage (ä, ö, ü, ß). Problem: When working with German language projects, incorrect umlaut representations are frequently written (e.g., 'fuer' instead of 'für', 'ueber' instead of 'über'). This causes issues in production code, documentation, and user-facing content. Solution: - New plugin: german-language-validator - New hook example: german_umlaut_validator_example.py - Validates 35+ common German words that must have umlauts - Blocks Write/Edit operations with clear warnings when errors found - Supports common file types (.md, .tsx, .py, .json, etc.) Closes #17324 --- .../hooks/german_umlaut_validator_example.py | 194 ++++++++++++++++++ .../.claude-plugin/plugin.json | 10 + plugins/german-language-validator/README.md | 81 ++++++++ .../hooks/hooks.json | 24 +++ .../hooks/umlaut_validator.py | 118 +++++++++++ 5 files changed, 427 insertions(+) create mode 100644 examples/hooks/german_umlaut_validator_example.py create mode 100644 plugins/german-language-validator/.claude-plugin/plugin.json create mode 100644 plugins/german-language-validator/README.md create mode 100644 plugins/german-language-validator/hooks/hooks.json create mode 100644 plugins/german-language-validator/hooks/umlaut_validator.py diff --git a/examples/hooks/german_umlaut_validator_example.py b/examples/hooks/german_umlaut_validator_example.py new file mode 100644 index 0000000000..1e85acfdf0 --- /dev/null +++ b/examples/hooks/german_umlaut_validator_example.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python3 +""" +Claude Code Hook: German Umlaut Validator +========================================== +This hook runs as a PreToolUse hook for Write and Edit tools. +It validates German text content for common umlaut mistakes like +'fuer' instead of 'für', 'ueber' instead of 'über', etc. + +Read more about hooks here: https://docs.anthropic.com/en/docs/claude-code/hooks + +Configuration example for settings.json: + +{ + "hooks": { + "PreToolUse": [ + { + "matcher": "Write", + "hooks": [ + { + "type": "command", + "command": "python3 /path/to/claude-code/examples/hooks/german_umlaut_validator_example.py" + } + ] + }, + { + "matcher": "Edit", + "hooks": [ + { + "type": "command", + "command": "python3 /path/to/claude-code/examples/hooks/german_umlaut_validator_example.py" + } + ] + } + ] + } +} + +This helps prevent common mistakes when working with German language content, +which is especially important for: +- User-facing text in applications +- Documentation and README files +- Error messages and validation text +- Database content and exports +""" + +import json +import re +import sys +from typing import List, Tuple + +# Common German words that MUST have umlauts +# Format: (wrong_pattern, correct_form, description) +UMLAUT_RULES: List[Tuple[re.Pattern, str, str]] = [ + # für/über family + (re.compile(r'\bfuer\b', re.IGNORECASE), 'für', 'fuer → für'), + (re.compile(r'\bueber\b', re.IGNORECASE), 'über', 'ueber → über'), + + # können/müssen/würden family (modal verbs) + (re.compile(r'\bkoennen\b', re.IGNORECASE), 'können', 'koennen → können'), + (re.compile(r'\bkoennte\b', re.IGNORECASE), 'könnte', 'koennte → könnte'), + (re.compile(r'\bmuessen\b', re.IGNORECASE), 'müssen', 'muessen → müssen'), + (re.compile(r'\bmuesste\b', re.IGNORECASE), 'müsste', 'muesste → müsste'), + (re.compile(r'\bwuerden\b', re.IGNORECASE), 'würden', 'wuerden → würden'), + (re.compile(r'\bwuerde\b', re.IGNORECASE), 'würde', 'wuerde → würde'), + + # hätte/wäre family (subjunctive) + (re.compile(r'\bhaette\b', re.IGNORECASE), 'hätte', 'haette → hätte'), + (re.compile(r'\bwaere\b', re.IGNORECASE), 'wäre', 'waere → wäre'), + + # möchte/mögen family + (re.compile(r'\bmoechte\b', re.IGNORECASE), 'möchte', 'moechte → möchte'), + (re.compile(r'\bmoegen\b', re.IGNORECASE), 'mögen', 'moegen → mögen'), + + # Größe/große family + (re.compile(r'\bgroesse\b', re.IGNORECASE), 'Größe', 'groesse → Größe'), + (re.compile(r'\bgrosse\b', re.IGNORECASE), 'große', 'grosse → große'), + (re.compile(r'\bgroesste\b', re.IGNORECASE), 'größte', 'groesste → größte'), + + # ß words + (re.compile(r'\bstrasse\b', re.IGNORECASE), 'Straße', 'strasse → Straße'), + (re.compile(r'\bschliessen\b', re.IGNORECASE), 'schließen', 'schliessen → schließen'), + (re.compile(r'\bheissen\b', re.IGNORECASE), 'heißen', 'heissen → heißen'), + (re.compile(r'\bweiss\b', re.IGNORECASE), 'weiß', 'weiss → weiß'), + + # ä words + (re.compile(r'\baehnlich\b', re.IGNORECASE), 'ähnlich', 'aehnlich → ähnlich'), + (re.compile(r'\baendern\b', re.IGNORECASE), 'ändern', 'aendern → ändern'), + (re.compile(r'\baenderung\b', re.IGNORECASE), 'Änderung', 'aenderung → Änderung'), + (re.compile(r'\bspaeter\b', re.IGNORECASE), 'später', 'spaeter → später'), + (re.compile(r'\bnaechste\b', re.IGNORECASE), 'nächste', 'naechste → nächste'), + (re.compile(r'\bwaehrend\b', re.IGNORECASE), 'während', 'waehrend → während'), + (re.compile(r'\berklaeren\b', re.IGNORECASE), 'erklären', 'erklaeren → erklären'), + (re.compile(r'\bbestaetigen\b', re.IGNORECASE), 'bestätigen', 'bestaetigen → bestätigen'), + + # ö words + (re.compile(r'\boeffnen\b', re.IGNORECASE), 'öffnen', 'oeffnen → öffnen'), + (re.compile(r'\boeffentlich\b', re.IGNORECASE), 'öffentlich', 'oeffentlich → öffentlich'), + (re.compile(r'\bloesung\b', re.IGNORECASE), 'Lösung', 'loesung → Lösung'), + (re.compile(r'\bloeschen\b', re.IGNORECASE), 'löschen', 'loeschen → löschen'), + (re.compile(r'\bhoechste\b', re.IGNORECASE), 'höchste', 'hoechste → höchste'), + (re.compile(r'\bmoeglichkeit\b', re.IGNORECASE), 'Möglichkeit', 'moeglichkeit → Möglichkeit'), + (re.compile(r'\bveroeffentlichen\b', re.IGNORECASE), 'veröffentlichen', 'veroeffentlichen → veröffentlichen'), + + # ü words + (re.compile(r'\bpruefung\b', re.IGNORECASE), 'Prüfung', 'pruefung → Prüfung'), + (re.compile(r'\bpruefen\b', re.IGNORECASE), 'prüfen', 'pruefen → prüfen'), + (re.compile(r'\bzurueck\b', re.IGNORECASE), 'zurück', 'zurueck → zurück'), + (re.compile(r'\bnatuerlich\b', re.IGNORECASE), 'natürlich', 'natuerlich → natürlich'), + (re.compile(r'\bverfuegbar\b', re.IGNORECASE), 'verfügbar', 'verfuegbar → verfügbar'), + (re.compile(r'\bdurchfuehren\b', re.IGNORECASE), 'durchführen', 'durchfuehren → durchführen'), + (re.compile(r'\bhinzufuegen\b', re.IGNORECASE), 'hinzufügen', 'hinzufuegen → hinzufügen'), + (re.compile(r'\bgueltig\b', re.IGNORECASE), 'gültig', 'gueltig → gültig'), + (re.compile(r'\bungueltig\b', re.IGNORECASE), 'ungültig', 'ungueltig → ungültig'), + (re.compile(r'\bausfuehren\b', re.IGNORECASE), 'ausführen', 'ausfuehren → ausführen'), + (re.compile(r'\bfuehren\b', re.IGNORECASE), 'führen', 'fuehren → führen'), +] + +# File extensions that should be checked (files likely to contain German text) +GERMAN_FILE_EXTENSIONS = { + '.md', '.txt', '.html', '.htm', '.tsx', '.ts', '.jsx', '.js', + '.json', '.py', '.yml', '.yaml', '.xml', '.vue', '.svelte' +} + + +def should_check_file(file_path: str) -> bool: + """Check if file should be validated for German umlauts.""" + if not file_path: + return False + + # Get file extension + dot_index = file_path.rfind('.') + if dot_index == -1: + return False + + ext = file_path[dot_index:].lower() + return ext in GERMAN_FILE_EXTENSIONS + + +def find_umlaut_errors(content: str) -> List[str]: + """Find all umlaut errors in content.""" + errors = [] + + for pattern, correct, description in UMLAUT_RULES: + matches = pattern.findall(content) + if matches: + count = len(matches) + errors.append(f"{description} ({count}x found)") + + return errors + + +def main(): + try: + input_data = json.load(sys.stdin) + except json.JSONDecodeError as e: + print(f"Error: Invalid JSON input: {e}", file=sys.stderr) + sys.exit(1) + + tool_name = input_data.get("tool_name", "") + + # Only check Write and Edit tools + if tool_name not in ("Write", "Edit"): + sys.exit(0) + + tool_input = input_data.get("tool_input", {}) + file_path = tool_input.get("file_path", "") + + # Get content based on tool type + content = tool_input.get("content", "") or tool_input.get("new_string", "") + + if not content: + sys.exit(0) + + # Only check files that might contain German text + if not should_check_file(file_path): + sys.exit(0) + + # Find umlaut errors + errors = find_umlaut_errors(content) + + if errors: + print(f"\n⚠️ GERMAN UMLAUT WARNING in {file_path}:", file=sys.stderr) + for error in errors: + print(f" • {error}", file=sys.stderr) + print("\nPlease use correct German umlauts (ä, ö, ü, ß)!", file=sys.stderr) + print("", file=sys.stderr) + + # Exit code 2 blocks the tool call and shows stderr to Claude + # Change to sys.exit(0) if you only want warnings without blocking + sys.exit(2) + + +if __name__ == "__main__": + main() diff --git a/plugins/german-language-validator/.claude-plugin/plugin.json b/plugins/german-language-validator/.claude-plugin/plugin.json new file mode 100644 index 0000000000..9769d2b857 --- /dev/null +++ b/plugins/german-language-validator/.claude-plugin/plugin.json @@ -0,0 +1,10 @@ +{ + "name": "german-language-validator", + "version": "1.0.0", + "description": "Validates German text for correct umlaut usage (ä, ö, ü, ß) in Write/Edit operations", + "author": "Community", + "license": "MIT", + "keywords": ["german", "umlaut", "validation", "i18n", "localization"], + "repository": "https://github.com/anthropics/claude-code", + "hooks": "hooks/hooks.json" +} diff --git a/plugins/german-language-validator/README.md b/plugins/german-language-validator/README.md new file mode 100644 index 0000000000..f3be91b354 --- /dev/null +++ b/plugins/german-language-validator/README.md @@ -0,0 +1,81 @@ +# German Language Validator Plugin + +A Claude Code plugin that validates German text for correct umlaut usage (ä, ö, ü, ß) during Write and Edit operations. + +## Problem + +When working with German language content, AI assistants frequently write incorrect umlaut representations: +- `fuer` instead of `für` +- `ueber` instead of `über` +- `koennen` instead of `können` +- `groesse` instead of `Größe` +- `strasse` instead of `Straße` + +These errors can cause issues in production code, documentation, and user-facing content. + +## Solution + +This plugin automatically validates text content before it's written or edited, checking for common German umlaut mistakes and blocking the operation with a warning if errors are found. + +## Installation + +```bash +# Enable the plugin in your settings.json +{ + "enabledPlugins": { + "german-language-validator@anthropic": true + } +} +``` + +Or install manually by copying the plugin to your `.claude/plugins` directory. + +## Supported File Types + +The validator checks these file extensions: +- `.md`, `.txt` - Documentation +- `.html`, `.htm` - Web content +- `.tsx`, `.ts`, `.jsx`, `.js` - JavaScript/TypeScript +- `.json` - Configuration files +- `.py` - Python files +- `.yml`, `.yaml` - YAML files +- `.xml`, `.vue` - Other markup + +## Validated Words + +The plugin checks for 35+ common German words including: + +### Modal Verbs +- können, könnte, müssen, würden, würde + +### Subjunctive Forms +- hätte, wäre, möchte, mögen + +### Common Nouns & Adjectives +- Größe, große, Straße, Änderung, Lösung +- ähnlich, später, nächste, öffentlich +- natürlich, verfügbar, gültig, ungültig + +### Common Verbs +- ändern, öffnen, prüfen, löschen +- schließen, heißen, führen, zurück + +## Customization + +To add more words to validate, edit `hooks/umlaut_validator.py` and add entries to the `UMLAUT_RULES` list: + +```python +(re.compile(r'\byourword\b', re.IGNORECASE), 'correct', 'yourword → correct'), +``` + +## Behavior + +- **Blocks** Write/Edit operations that contain umlaut errors +- Shows a warning message listing all found errors +- Does not modify content automatically (user must fix) + +To change to warning-only mode (don't block), change `sys.exit(2)` to `sys.exit(0)` in the validator. + +## License + +MIT diff --git a/plugins/german-language-validator/hooks/hooks.json b/plugins/german-language-validator/hooks/hooks.json new file mode 100644 index 0000000000..71c682bafd --- /dev/null +++ b/plugins/german-language-validator/hooks/hooks.json @@ -0,0 +1,24 @@ +{ + "PreToolUse": [ + { + "matcher": "Write", + "hooks": [ + { + "type": "command", + "command": "python3 $PLUGIN_DIR/hooks/umlaut_validator.py", + "statusMessage": "Validating German umlauts..." + } + ] + }, + { + "matcher": "Edit", + "hooks": [ + { + "type": "command", + "command": "python3 $PLUGIN_DIR/hooks/umlaut_validator.py", + "statusMessage": "Validating German umlauts..." + } + ] + } + ] +} diff --git a/plugins/german-language-validator/hooks/umlaut_validator.py b/plugins/german-language-validator/hooks/umlaut_validator.py new file mode 100644 index 0000000000..cccf7ccac4 --- /dev/null +++ b/plugins/german-language-validator/hooks/umlaut_validator.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +""" +German Umlaut Validator Hook +Validates German text for correct umlaut usage (ä, ö, ü, ß) +""" + +import json +import re +import sys +from typing import List, Tuple + +# Common German words that MUST have umlauts +UMLAUT_RULES: List[Tuple[re.Pattern, str, str]] = [ + # für/über family + (re.compile(r'\bfuer\b', re.IGNORECASE), 'für', 'fuer → für'), + (re.compile(r'\bueber\b', re.IGNORECASE), 'über', 'ueber → über'), + + # Modal verbs + (re.compile(r'\bkoennen\b', re.IGNORECASE), 'können', 'koennen → können'), + (re.compile(r'\bkoennte\b', re.IGNORECASE), 'könnte', 'koennte → könnte'), + (re.compile(r'\bmuessen\b', re.IGNORECASE), 'müssen', 'muessen → müssen'), + (re.compile(r'\bwuerden\b', re.IGNORECASE), 'würden', 'wuerden → würden'), + (re.compile(r'\bwuerde\b', re.IGNORECASE), 'würde', 'wuerde → würde'), + + # Subjunctive + (re.compile(r'\bhaette\b', re.IGNORECASE), 'hätte', 'haette → hätte'), + (re.compile(r'\bwaere\b', re.IGNORECASE), 'wäre', 'waere → wäre'), + + # möchte family + (re.compile(r'\bmoechte\b', re.IGNORECASE), 'möchte', 'moechte → möchte'), + (re.compile(r'\bmoegen\b', re.IGNORECASE), 'mögen', 'moegen → mögen'), + + # Größe family + (re.compile(r'\bgroesse\b', re.IGNORECASE), 'Größe', 'groesse → Größe'), + (re.compile(r'\bgrosse\b', re.IGNORECASE), 'große', 'grosse → große'), + + # ß words + (re.compile(r'\bstrasse\b', re.IGNORECASE), 'Straße', 'strasse → Straße'), + (re.compile(r'\bschliessen\b', re.IGNORECASE), 'schließen', 'schliessen → schließen'), + (re.compile(r'\bheissen\b', re.IGNORECASE), 'heißen', 'heissen → heißen'), + + # ä words + (re.compile(r'\baehnlich\b', re.IGNORECASE), 'ähnlich', 'aehnlich → ähnlich'), + (re.compile(r'\baendern\b', re.IGNORECASE), 'ändern', 'aendern → ändern'), + (re.compile(r'\baenderung\b', re.IGNORECASE), 'Änderung', 'aenderung → Änderung'), + (re.compile(r'\bspaeter\b', re.IGNORECASE), 'später', 'spaeter → später'), + (re.compile(r'\bnaechste\b', re.IGNORECASE), 'nächste', 'naechste → nächste'), + + # ö words + (re.compile(r'\boeffnen\b', re.IGNORECASE), 'öffnen', 'oeffnen → öffnen'), + (re.compile(r'\boeffentlich\b', re.IGNORECASE), 'öffentlich', 'oeffentlich → öffentlich'), + (re.compile(r'\bloesung\b', re.IGNORECASE), 'Lösung', 'loesung → Lösung'), + (re.compile(r'\bloeschen\b', re.IGNORECASE), 'löschen', 'loeschen → löschen'), + + # ü words + (re.compile(r'\bpruefung\b', re.IGNORECASE), 'Prüfung', 'pruefung → Prüfung'), + (re.compile(r'\bpruefen\b', re.IGNORECASE), 'prüfen', 'pruefen → prüfen'), + (re.compile(r'\bzurueck\b', re.IGNORECASE), 'zurück', 'zurueck → zurück'), + (re.compile(r'\bnatuerlich\b', re.IGNORECASE), 'natürlich', 'natuerlich → natürlich'), + (re.compile(r'\bverfuegbar\b', re.IGNORECASE), 'verfügbar', 'verfuegbar → verfügbar'), + (re.compile(r'\bgueltig\b', re.IGNORECASE), 'gültig', 'gueltig → gültig'), + (re.compile(r'\bungueltig\b', re.IGNORECASE), 'ungültig', 'ungueltig → ungültig'), +] + +GERMAN_FILE_EXTENSIONS = { + '.md', '.txt', '.html', '.tsx', '.ts', '.jsx', '.js', + '.json', '.py', '.yml', '.yaml', '.xml', '.vue' +} + + +def should_check_file(file_path: str) -> bool: + if not file_path: + return False + dot_index = file_path.rfind('.') + if dot_index == -1: + return False + ext = file_path[dot_index:].lower() + return ext in GERMAN_FILE_EXTENSIONS + + +def find_umlaut_errors(content: str) -> List[str]: + errors = [] + for pattern, correct, description in UMLAUT_RULES: + matches = pattern.findall(content) + if matches: + errors.append(f"{description} ({len(matches)}x)") + return errors + + +def main(): + try: + input_data = json.load(sys.stdin) + except json.JSONDecodeError: + sys.exit(0) + + tool_name = input_data.get("tool_name", "") + if tool_name not in ("Write", "Edit"): + sys.exit(0) + + tool_input = input_data.get("tool_input", {}) + file_path = tool_input.get("file_path", "") + content = tool_input.get("content", "") or tool_input.get("new_string", "") + + if not content or not should_check_file(file_path): + sys.exit(0) + + errors = find_umlaut_errors(content) + + if errors: + print(f"\n⚠️ UMLAUT WARNING in {file_path}:", file=sys.stderr) + for error in errors: + print(f" • {error}", file=sys.stderr) + print("\nUse correct German umlauts: ä, ö, ü, ß", file=sys.stderr) + sys.exit(2) + + +if __name__ == "__main__": + main()