diff --git a/.claude/hooks/session_start.sh b/.claude/hooks/session_start.sh new file mode 100755 index 000000000..a59f9cb1c --- /dev/null +++ b/.claude/hooks/session_start.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# OpenCC 專案啟動 Hook +# 此腳本在 Claude Code 會話開始時自動執行 + +cat << 'EOF' +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + 🔄 OpenCC - 開放中文轉換 (Open Chinese Convert) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +📚 快速參考文件: + • AGENTS.md - 專案架構與模組速覽 + • CONTRIBUTING.md - 貢獻指南(詞典、測試、開發流程) + • doc/ALGORITHM_AND_LIMITATIONS.md - 演算法與理論局限性 + +🛠️ 常用指令: + • bazel test //test/... - 執行所有測試 + • bazel test //data/dictionary:dictionary_test - 測試詞典 + • python3 data/scripts/sort_all.py data/dictionary - 排序所有詞典 + +📖 詞典位置:data/dictionary/*.txt + 設定檔案:data/config/*.json + 測試案例:test/testcases/testcases.json + +💡 提示:修改詞典後務必執行排序腳本,否則測試會失敗 + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +EOF diff --git a/.claude/skills/opencc-algorithm-explain.md b/.claude/skills/opencc-algorithm-explain.md new file mode 100644 index 000000000..776727ac4 --- /dev/null +++ b/.claude/skills/opencc-algorithm-explain.md @@ -0,0 +1,178 @@ +--- +name: opencc-algorithm-explain +description: 解釋 OpenCC 的核心演算法、架構設計與理論局限性 +tags: [algorithm, architecture, theory, opencc] +--- + +# OpenCC 演算法解釋技能 + +此技能協助解釋 OpenCC 的內部運作原理、設計決策與局限性。 + +## 使用時機 + +當用戶詢問: +- OpenCC 如何運作? +- 為什麼某些轉換會出錯? +- OpenCC 與其他轉換工具的差異? +- 如何改進轉換準確度? + +## 核心概念 + +### 1. 最大正向匹配分詞(MaxMatch) + +**原理**:從左到右掃描,每次匹配最長的詞條。 + +**範例**: + +詞典:`一`, `一個`, `一個人` +輸入:`一個人去` + +``` +位置 0: 嘗試 "一個人去" (4) → ✗ +位置 0: 嘗試 "一個人" (3) → ✓ +位置 3: 嘗試 "去" (1) → 保留 +結果: [一個人] [去] +``` + +**優點**: +- 快速(O(n),n 為文本長度) +- 確定性(相同輸入必定相同輸出) +- 無需訓練 + +**缺點**: +- 不考慮上下文 +- 貪婪策略可能非最優解 +- 依賴詞典完整性 + +### 2. 轉換鏈機制 + +**流程**:配置 → 分詞 → 多階段轉換 → 輸出 + +**範例**(`s2twp.json`): + +``` +簡體文字 + ↓ 階段 1: STPhrases + STCharacters +繁體文字 + ↓ 階段 2: TWPhrases +臺灣慣用詞 + ↓ 階段 3: TWVariants +臺灣標準繁體 +``` + +每階段的輸出成為下一階段的輸入。 + +### 3. 詞典優先級 + +在 `DictGroup` 中,詞典依序查詢: + +``` +查詢 "干燥" → STPhrases (命中) → "乾燥" ✓ +查詢 "干" → STPhrases (未中) → STCharacters (命中) → "乾" +``` + +這實現「詞組優先,單字後備」策略。 + +## 理論局限性 + +### 1. 一對多歧義問題 + +**根本原因**:缺乏語義理解和上下文分析。 + +| 簡體 | 可能的繁體 | 語境範例 | +|------|-----------|---------| +| 干 | 乾、幹、干 | 乾燥、幹活、干擾 | +| 后 | 後、后 | 後天、皇后 | +| 面 | 麵、面 | 麵條、面孔 | + +**OpenCC 解法**:窮舉詞組 + +``` +# STPhrases.txt +干燥 乾燥 +干扰 干擾 +干活 幹活 +``` + +**局限**: +- 詞典會無限膨脹 +- 無法處理未登錄詞 +- 新詞、網路用語需要人工新增 + +### 2. 缺乏上下文理解 + +**問題案例**: + +``` +輸入: "后天就是星期一" +理想: "後天就是星期一" ✓ + +輸入: "皇后天生麗質" +可能: "皇後天生麗質" ✗ (應為 "皇后") +``` + +如果詞典有 `后天 → 後天`,第二句可能被錯誤分詞。 + +**根本原因**: +- MaxMatch 只看局部,不看全局 +- 不理解「皇后」是一個完整詞彙 +- 需要將「皇后」也加入詞典 + +### 3. 不使用語言模型 + +**對比**: + +| 方法 | 上下文 | 概率 | 新詞 | 速度 | 維護 | +|------|-------|------|------|------|------| +| OpenCC(規則) | ✗ | ✗ | ✗ | 極快 | 高 | +| 統計模型(N-gram) | 有限 | ✓ | 有限 | 中 | 中 | +| 神經網路(Transformer) | ✓ | ✓ | ✓ | 慢 | 低 | + +**OpenCC 的權衡**: +- 選擇速度、確定性、輕量級 +- 犧牲了語義理解和自動學習能力 + +### 4. 維護負擔 + +**數據規模**: +- `STPhrases.txt`:~60,000 詞條 +- `TWPhrases*.txt`:~50,000 詞條 + +**挑戰**: +- 每個錯誤案例可能需要新增多個詞條 +- 詞條衝突需要人工仲裁 +- 地區差異(大陸、臺灣、香港)需分別處理 +- 專業領域術語需專家審核 + +## 何時使用 OpenCC? + +**適合場景**: +- ✓ 通用文本轉換 +- ✓ 需要高效能(即時轉換) +- ✓ 需要確定性(相同輸入必定相同輸出) +- ✓ 輕量級部署(瀏覽器、移動端) + +**不適合場景**: +- ✗ 大量未登錄詞(新詞、網路用語) +- ✗ 高度專業化領域(需要語境理解) +- ✗ 需要自動學習新模式 + +## 改進建議 + +**短期**(在現有框架內): +1. 持續豐富詞典 +2. 社群回報問題並提交 PR +3. 針對特定領域建立自訂詞典 + +**長期**(架構升級): +1. 引入統計語言模型輔助歧義消解 +2. 使用深度學習處理未登錄詞 +3. 提供混合模式(規則 + 模型) + +但這會增加複雜度和計算成本,需要仔細權衡。 + +## 參考資料 + +- **[doc/ALGORITHM_AND_LIMITATIONS.md](../doc/ALGORITHM_AND_LIMITATIONS.md)** - 完整的演算法與局限性分析 +- **[AGENTS.md](../AGENTS.md)** - 專案架構速覽 +- **[src/README.md](../src/README.md)** - 核心模組技術文件 diff --git a/.claude/skills/opencc-dict-edit.md b/.claude/skills/opencc-dict-edit.md new file mode 100644 index 000000000..b989b124a --- /dev/null +++ b/.claude/skills/opencc-dict-edit.md @@ -0,0 +1,133 @@ +--- +name: opencc-dict-edit +description: 編輯 OpenCC 詞典並執行相關的測試流程 +tags: [dictionary, testing, opencc] +--- + +# OpenCC 詞典編輯技能 + +此技能協助編輯 OpenCC 詞典檔案,並確保正確執行測試驅動開發流程。 + +## 使用時機 + +當用戶要求: +- 新增或修改詞典條目 +- 修正轉換錯誤 +- 新增地區用詞 + +## 執行步驟 + +### 1. 確認詞典檔案 + +根據轉換類型選擇正確的詞典: + +**簡繁轉換基礎:** +- `data/dictionary/STCharacters.txt` - 簡→繁(單字) +- `data/dictionary/STPhrases.txt` - 簡→繁(詞組) +- `data/dictionary/TSCharacters.txt` - 繁→簡(單字) +- `data/dictionary/TSPhrases.txt` - 繁→簡(詞組) + +**臺灣地區用詞:** +- `data/dictionary/TWVariants.txt` - 臺灣異體字 +- `data/dictionary/TWPhrasesIT.txt` - 資訊科技用語 +- `data/dictionary/TWPhrasesName.txt` - 人名地名 +- `data/dictionary/TWPhrasesOther.txt` - 其他用語 + +**香港地區用詞:** +- `data/dictionary/HKVariants.txt` + +**日文:** +- `data/dictionary/JPShinjitaiCharacters.txt` +- `data/dictionary/JPShinjitaiPhrases.txt` + +### 2. 撰寫測試案例(TDD) + +**重要**:先寫測試,確保修改前測試會失敗。 + +編輯 `test/testcases/testcases.json`: + +```json +{ + "id": "case_XXX", + "input": "輸入文字", + "expected": { + "s2t": "預期輸出", + "s2tw": "預期輸出(臺灣)", + "s2twp": "預期輸出(臺灣含慣用詞)" + } +} +``` + +**配置選擇**: +- 修改 `STPhrases.txt` → 測試 `s2t`, `s2tw`, `s2twp`, `s2hk` +- 修改 `TWPhrases*.txt` → 測試 `s2tw`, `s2twp` +- 修改 `HKVariants.txt` → 測試 `s2hk` + +### 3. 執行測試(確認失敗) + +```bash +bazel test //test:opencc_test +``` + +應該看到新測試案例失敗。 + +### 4. 編輯詞典 + +**格式**:`來源目標` + +**注意事項**: +- 使用 Tab 字元(`\t`),不是空格 +- 一行一個條目 +- UTF-8 編碼 + +範例: +``` +虚伪叹息 虛偽嘆息 +``` + +### 5. 排序詞典 + +**重要**:詞典必須排序,否則測試會失敗。 + +單一檔案: +```bash +python3 data/scripts/sort.py data/dictionary/STPhrases.txt +``` + +所有檔案: +```bash +python3 data/scripts/sort_all.py data/dictionary +``` + +### 6. 執行測試(確認成功) + +```bash +bazel test //test:opencc_test +bazel test //data/dictionary:dictionary_test +``` + +所有測試應該通過。 + +### 7. 提交變更 + +```bash +git add data/dictionary/*.txt test/testcases/testcases.json +git commit -m "新增詞典條目:[簡要描述]" +``` + +## 常見陷阱 + +- ❌ 使用空格而非 Tab → 格式錯誤 +- ❌ 忘記排序 → `DictionaryTest` 失敗 +- ❌ 未先寫測試 → 無法驗證修改效果 +- ❌ 測試配置不完整 → 遺漏某些轉換場景 + +## 檢查清單 + +- [ ] 選擇了正確的詞典檔案 +- [ ] 在 `testcases.json` 新增測試案例 +- [ ] 執行測試確認失敗 +- [ ] 編輯詞典(使用 Tab 分隔) +- [ ] 執行排序腳本 +- [ ] 執行測試確認成功 +- [ ] 提交變更 diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..3e1f41443 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,84 @@ +# OpenCC 專案速覽 + +本文檔彙整目前代理掌握的 Open Chinese Convert(OpenCC)專案資訊,協助快速熟悉程式碼結構、資料組織與配套工具。 + +## 專案概述 +- OpenCC 是一套開源的中文簡繁體與地區用詞轉換工具,支援簡↔繁、港澳臺差異、日文新舊字形等多種轉換方案。 +- 專案同時提供 C++ 核心程式庫、C 語言介面、命令列工具,以及 Python、Node.js 等語言綁定,詞庫與程式解耦,方便自訂與擴充。 +- 主要相依:`rapidjson` 解析設定,`marisa-trie` 處理高效能詞典(`.ocd2`),可選 `Darts` 以支援舊版 `.ocd`。 + +## 核心模組與流程 +1. **設定載入 (`src/Config.cpp`)** + - 讀取 JSON 設定(位於 `data/config/*.json`),解析分詞器定義與轉換鏈。 + - 依 `type` 欄位載入不同格式的詞典(純文字、`ocd2`、詞典組),並支援附加搜尋路徑。 + - 建立 `Converter` 物件,持有分詞器與轉換鏈。 + +2. **分詞 (`src/MaxMatchSegmentation.cpp`)** + - 預設分詞型態為 `mmseg`,即最大正向匹配。 + - 以詞典做最長前綴匹配,將輸入切成 `Segments`;無法匹配的 UTF-8 片段依字元長度保留。 + +3. **轉換鏈 (`src/ConversionChain.cpp`, `src/Conversion.cpp`)** + - 轉換鏈是有序的 `Conversion` 清單,每個節點依賴一個詞典,透過最長前綴匹配把片段替換為目標值。 + - 支援詞組優先、異體字替換、多階段組合等進階情境。 + +4. **詞典系統** + - 抽象介面 `Dict` 統一前綴匹配、全前綴匹配與詞典遍歷。 + - `TextDict` (`.txt`) 由制表符純文字建構詞典;`MarisaDict` (`.ocd2`) 提供高效能字典樹;`DictGroup` 可將多個詞典依序組成集合。 + - `SerializableDict` 定義序列化與檔案載入邏輯,命令列工具據此在不同格式間互轉。 + +5. **API 封裝** + - `SimpleConverter`(高階 C++ 介面)封裝 `Config + Converter`,提供字串、指標緩衝、部分長度轉換等多種多載。 + - `opencc.h` 暴露 C API:`opencc_open`、`opencc_convert_utf8` 等,供語言綁定與命令列重用。 + - 命令列程式 `opencc`(`src/tools/CommandLine.cpp`)示範批次轉換、串流讀取、自動刷新與同檔案輸入輸出處理。 + +## 資料與設定 +- 詞庫維護在 `data/dictionary/*.txt`,涵蓋短語、單字、地區差異、日文新字等專題檔;建置時轉成 `.ocd2` 加速。 +- 預設設定位於 `data/config/`,如 `s2t.json`、`t2s.json`、`s2tw.json` 等,定義分詞器型態、使用的詞典與組合方式。 +- `data/scheme` 與 `data/scripts` 提供詞庫編譯腳本與規範校驗工具。 + +### 詞典二進位格式:`.ocd` 與 `.ocd2` +- `.ocd`(舊格式)以 `OPENCCDARTS1` 為檔頭,主體為 Darts double-array trie 的序列化資料,搭配 `BinaryDict` 結構保存鍵值偏移與拼接緩衝,載入流程見 `src/DartsDict.cpp` 與 `src/BinaryDict.cpp`。常用於需要 `ENABLE_DARTS` 的相容環境。 +- `.ocd2`(預設格式)以 `OPENCC_MARISA_0.2.5` 為檔頭,接著寫入 `marisa::Trie` 資料,然後用 `SerializedValues` 模組保存所有候選值列表,詳見 `src/MarisaDict.cpp`、`src/SerializedValues.cpp`。此格式體積更小、載入更快(例如 `NEWS.md` 記錄 `STPhrases` 從 4.3MB 縮減至 924KB)。 +- 命令列工具 `opencc_dict` 支援 `text ↔ ocd2`(以及可選 `ocd`)互轉,新增或調整詞典時先編輯 `.txt`,再執行工具產生目標格式。 + +## 開發與測試 +- 頂層建置系統支援 CMake、Bazel、Node.js 的 `binding.gyp`、Python `pyproject.toml`,跨平台整合 CI。 +- `src/*Test.cpp`、`test/` 目錄包含 Google Test 風格的單元測試,涵蓋詞典匹配、轉換鏈、分詞等關鍵邏輯。 +- 工具 `opencc_dict`、`opencc_phrase_extract`(`src/tools/`)協助開發者轉換詞庫格式、抽取短語。 + +## 生態綁定 +- Python 模組位於 `python/`,透過 C API 提供 `OpenCC` 類別。 +- Node.js 擴充在 `node/` 目錄,使用 N-API/Node-API 呼叫核心程式庫。 +- README 列出第三方 Swift、Java、Go、WebAssembly 等移植專案,展示生態廣度。 + +## 常見自訂步驟 +1. 編輯或新增 `data/dictionary/*.txt` 詞條。 +2. 使用 `opencc_dict` 轉換為 `.ocd2`。 +3. 在 `data/config` 複製/修改設定 JSON 並指定新的詞典檔案。 +4. 透過 `SimpleConverter`、命令列工具或語言綁定載入自訂設定驗證效果。 + +> 若需更深入,可閱讀 `src/README.md` 的模組說明,或參考 `test/` 下的案例理解轉換鏈組合。 + +## 瀏覽器與第三方實作注意事項 +- 官方未直接支援純前端執行,社群方案(如 `opencc-js`、`wasm-opencc`)可供參考。 +- 若自行編譯 WebAssembly,可用 Emscripten 將 `.ocd2` 寫入虛擬檔案系統,在 Web Worker 中呼叫轉換以避免阻塞 UI,並搭配 gzip/brotli 與 Service Worker 快取降低首次載入成本。 +- 純 JavaScript 查表可先將詞典預處理為 JSON/Trie 結構,手寫最長前綴匹配;請留意控制資源體積,並在轉換長文本時避免多餘字串拷貝。 + +### 第三方實作常見偏差(推測) +- **缺少分詞與轉換鏈順序**:若未還原 `group` 設定或詞典優先級,複合詞可能被拆開或被單字覆蓋。 +- **最長前綴邏輯缺失**:只按字元替換會遺漏成語、多字詞結果。 +- **UTF-8 處理不嚴謹**:疏漏多位元組字元或 surrogate pair 處理,容易位移或截斷。 +- **詞典/設定不完整**:缺少分詞詞典、地區差異等 `.ocd2`,輸出會缺詞。 +- **路徑與載入流程差異**:若未遵循 OpenCC 的路徑搜尋與設定解析細節,實際載入的資源與官方不同,結果自然偏差。 + +## 延伸閱讀 + +### 技術文件 +- **[演算法與理論局限性分析](doc/ALGORITHM_AND_LIMITATIONS.md)** - 深入探討 OpenCC 的核心演算法(最大正向匹配分詞)、轉換鏈機制、詞典系統,以及在中文簡繁轉換中面臨的理論局限性(一對多歧義、缺乏上下文理解、維護負擔等)。 + +### 貢獻指南 +- **[CONTRIBUTING.md](CONTRIBUTING.md)** - 如何為 OpenCC 貢獻詞典條目、撰寫測試案例、執行測試流程的完整說明。 + +### 專案文件 +- **[src/README.md](src/README.md)** - 核心模組的詳細技術說明。 +- **[README.md](README.md)** - 專案總覽、安裝與使用指南。 diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..43c994c2d --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1 @@ +@AGENTS.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..9b996d349 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,296 @@ +# 貢獻指南 + +感謝您對 OpenCC 專案的貢獻!本文件說明如何為 OpenCC 貢獻詞典條目、撰寫測試並確保程式碼品質。 + +## 目錄 + +- [新增詞典條目](#新增詞典條目) +- [排序詞典](#排序詞典) +- [執行測試](#執行測試) +- [撰寫測試案例](#撰寫測試案例) +- [簡轉繁轉換的特殊注意事項](#簡轉繁轉換的特殊注意事項) + +## 新增詞典條目 + +### 1. 選擇正確的詞典檔案 + +詞典檔案位於 `data/dictionary/` 目錄下,根據轉換類型選擇對應的檔案: + +- **簡繁轉換** + - `STCharacters.txt` - 簡體到繁體(單字) + - `STPhrases.txt` - 簡體到繁體(詞組) + - `TSCharacters.txt` - 繁體到簡體(單字) + - `TSPhrases.txt` - 繁體到簡體(詞組) + +- **臺灣地區用詞** + - `TWVariants.txt` - 臺灣異體字 + - `TWPhrasesIT.txt` - 臺灣資訊科技用語 + - `TWPhrasesName.txt` - 臺灣人名地名 + - `TWPhrasesOther.txt` - 臺灣其他用語 + +- **香港地區用詞** + - `HKVariants.txt` - 香港異體字 + - `HKVariantsRevPhrases.txt` - 香港異體字反向詞組 + +- **日文新舊字形** + - `JPShinjitaiCharacters.txt` - 日文新字體(單字) + - `JPShinjitaiPhrases.txt` - 日文新字體(詞組) + - `JPVariants.txt` - 日文異體字 + +### 2. 詞典格式規範 + +詞典檔案使用 **Tab 字元**(`\t`)分隔來源詞與目標詞,**請勿使用空格**。 + +格式:`來源詞目標詞` + +範例: + +``` +虚伪叹息 虛偽嘆息 +潮湿灶台 潮濕灶台 +赞叹 讚歎 +``` + +如果一個來源詞對應多個可能的目標詞,使用空格分隔: + +``` +一出 一齣 一出 +``` + +### 3. 編輯詞典 + +使用文字編輯器開啟對應的 `.txt` 檔案,新增您的詞條。請確保: + +1. 使用 **Tab 字元**(`\t`)分隔來源詞與目標詞 +2. 每行一個條目 +3. 檔案使用 UTF-8 編碼 + +## 排序詞典 + +**重要**:詞典檔案必須按字典序排序,否則測試會失敗。 + +### 使用排序工具 + +專案提供了自動排序工具,位於 `data/scripts/` 目錄: + +#### 排序單一檔案 + +```bash +python3 data/scripts/sort.py data/dictionary/STPhrases.txt +``` + +這會直接排序並覆蓋原檔案。如果想輸出到其他檔案: + +```bash +python3 data/scripts/sort.py data/dictionary/STPhrases.txt data/dictionary/STPhrases_sorted.txt +``` + +#### 排序所有詞典檔案 + +```bash +python3 data/scripts/sort_all.py data/dictionary +``` + +這會自動排序 `data/dictionary/` 目錄下所有 `.txt` 檔案。 + +### 排序檢查 + +排序是否正確會在測試時自動檢查。如果詞典未排序或包含重複的鍵,`DictionaryTest` 會報錯: + +``` +[ FAILED ] DictionaryTest/STPhrases.UniqueSortedTest +STPhrases is not sorted. +``` + +遇到此錯誤時,請執行排序工具重新排序。 + +## 執行測試 + +OpenCC 使用 [Bazel](https://bazel.build/) 作為建置系統。 + +### 安裝 Bazel + +#### macOS + +```bash +brew install bazel +``` + +#### Ubuntu/Debian + +```bash +sudo apt install bazel +``` + +或參考 [Bazel 官方安裝指南](https://bazel.build/install)。 + +#### 其他作業系統 + +請參考 [Bazel 安裝文件](https://bazel.build/install) 獲取適合您系統的安裝方式。 + +### 執行所有測試 + +```bash +bazel test --test_output=all //src/... //data/... //test/... //python/... +``` + +### 執行特定測試 + +僅測試詞典: + +```bash +bazel test //data/dictionary:dictionary_test +``` + +僅測試轉換案例: + +```bash +bazel test //test:opencc_test +``` + +### 測試輸出 + +- `--test_output=all`:顯示所有測試輸出 +- `--test_output=errors`:僅顯示失敗的測試 + +## 撰寫測試案例 + +### 測試驅動開發流程 + +在修改詞典前,建議先撰寫測試案例,遵循測試驅動開發(TDD)流程: + +1. **先寫測試**:在 `test/testcases/testcases.json` 新增測試案例 +2. **確認測試失敗**:執行測試,確認新案例因為詞典未更新而失敗 +3. **修改詞典**:新增或修改詞典條目 +4. **測試通過**:再次執行測試,確認修改後測試通過 + +這樣可以確保您的修改確實達到預期效果。 + +### 測試案例格式 + +測試案例定義於 `test/testcases/testcases.json`,格式如下: + +```json +{ + "cases": [ + { + "id": "case_xxx", + "input": "輸入文字", + "expected": { + "s2t": "預期的簡轉繁輸出", + "s2tw": "預期的簡轉臺灣正體輸出", + "t2s": "預期的繁轉簡輸出" + } + } + ] +} +``` + +### 欄位說明 + +- `id`:唯一的測試案例識別碼,建議使用 `case_` 前綴加流水號 +- `input`:輸入文字 +- `expected`:各種轉換配置的預期輸出 + - 僅需包含您要測試的轉換配置 + - 可以同時測試多種配置 + +### 可用的轉換配置 + +- `s2t` - 簡體到繁體 +- `s2tw` - 簡體到臺灣正體 +- `s2twp` - 簡體到臺灣正體(含慣用詞) +- `s2hk` - 簡體到香港繁體 +- `t2s` - 繁體到簡體 +- `t2tw` - 繁體到臺灣正體 +- `tw2s` - 臺灣正體到簡體 +- `tw2sp` - 臺灣正體到簡體(含慣用詞) +- `hk2s` - 香港繁體到簡體 +- `hk2t` - 香港繁體到臺灣正體 +- `t2hk` - 繁體到香港繁體 +- `tw2t` - 臺灣正體到繁體 +- `jp2t` - 日文新字體到繁體 +- `t2jp` - 繁體到日文新字體 + +### 範例 + +```json +{ + "id": "case_100", + "input": "鼠标和键盘是计算机的输入设备", + "expected": { + "s2t": "鼠標和鍵盤是計算機的輸入設備", + "s2tw": "滑鼠和鍵盤是電腦的輸入裝置", + "s2twp": "滑鼠和鍵盤是電腦的輸入裝置" + } +} +``` + +## 簡轉繁轉換的特殊注意事項 + +當您修改簡轉繁相關詞典時,需要特別注意不同地區的轉換配置可能都會受到影響。 + +### 涉及的配置檔案 + +簡轉繁轉換主要涉及以下配置: + +1. **`s2t.json`** - 基本簡轉繁 + - 使用 `STPhrases.txt` 和 `STCharacters.txt` + +2. **`s2tw.json`** - 簡體轉臺灣正體 + - 使用 `STPhrases.txt`、`STCharacters.txt` + - 額外使用 `TWVariants.txt` + +3. **`s2twp.json`** - 簡體轉臺灣正體(含慣用詞) + - 使用 `STPhrases.txt`、`STCharacters.txt` + - 額外使用 `TWPhrases.txt`、`TWVariants.txt` + +4. **`s2hk.json`** - 簡體轉香港繁體 + - 使用 `STPhrases.txt`、`STCharacters.txt` + - 額外使用 `HKVariants.txt` + +### 測試建議 + +修改 `STPhrases.txt` 或 `STCharacters.txt` 時,建議在 `testcases.json` 中同時測試多個相關配置: + +```json +{ + "id": "case_example", + "input": "简体文字", + "expected": { + "s2t": "繁體文字", + "s2tw": "繁體文字", + "s2twp": "臺灣慣用詞", + "s2hk": "香港繁體" + } +} +``` + +這樣可以確保您的修改在各種轉換情境下都能正確運作。 + +### 常見情況 + +- **僅修改基本簡繁對應**:修改 `STCharacters.txt`,測試至少包含 `s2t` +- **修改詞組轉換**:修改 `STPhrases.txt`,測試包含 `s2t`、`s2tw`、`s2twp`、`s2hk` +- **臺灣特有用詞**:修改 `TWPhrases*.txt` 或 `TWVariants.txt`,測試包含 `s2tw`、`s2twp` +- **香港特有用詞**:修改 `HKVariants*.txt`,測試包含 `s2hk` + +## 提交變更 + +完成修改後,請確認: + +- [ ] 詞典檔案已使用 Tab 字元分隔 +- [ ] 詞典檔案已正確排序(執行 `sort.py` 或 `sort_all.py`) +- [ ] 已新增對應的測試案例到 `testcases.json` +- [ ] 修改前測試案例失敗,修改後測試通過 +- [ ] 所有測試通過(`bazel test --test_output=all //src/... //data/... //test/...`) + +符合以上條件後,即可提交 Pull Request。 + +## 需要協助? + +如有任何問題,歡迎: + +- 在 [GitHub Issues](https://github.com/BYVoid/OpenCC/issues) 提問 +- 加入 [Telegram 討論群組](https://t.me/open_chinese_convert) + +感謝您的貢獻! diff --git a/data/config/BUILD.bazel b/data/config/BUILD.bazel index ea3ee617d..e369163f4 100644 --- a/data/config/BUILD.bazel +++ b/data/config/BUILD.bazel @@ -4,3 +4,19 @@ filegroup( name = "config", srcs = glob(["*.json"]), ) + +cc_test( + name = "config_dict_validation_test", + srcs = ["ConfigDictValidationTest.cpp"], + data = [ + ":config", + "//data/dictionary:binary_dictionaries", + "//test/testcases", + ], + deps = [ + "//src:simple_converter", + "@bazel_tools//tools/cpp/runfiles", + "@googletest//:gtest_main", + "@rapidjson", + ], +) diff --git a/data/config/ConfigDictValidationTest.cpp b/data/config/ConfigDictValidationTest.cpp new file mode 100644 index 000000000..412baa93e --- /dev/null +++ b/data/config/ConfigDictValidationTest.cpp @@ -0,0 +1,109 @@ +/* + * Open Chinese Convert + * + * End-to-end validation of all configs against consolidated testcases.json. + */ + +#ifndef BAZEL +// This test is Bazel-only; CMake builds should skip compiling it. +static_assert(false, "ConfigDictValidationTest is only supported under Bazel"); +#else + +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "rapidjson/document.h" +#include "src/SimpleConverter.hpp" + +#include "tools/cpp/runfiles/runfiles.h" +using bazel::tools::cpp::runfiles::Runfiles; + +namespace opencc { +namespace { + +class ConfigDictValidationTest : public ::testing::Test { +protected: + void SetUp() override { +#ifdef BAZEL + runfiles_.reset(Runfiles::CreateForTest()); + ASSERT_NE(nullptr, runfiles_); + testcasesPath_ = runfiles_->Rlocation("_main/test/testcases/testcases.json"); + configDir_ = runfiles_->Rlocation("_main/data/config"); + dictDir_ = runfiles_->Rlocation("_main/data/dictionary"); +#else + FAIL() << "This test expects Bazel runfiles."; +#endif + } + + std::string ReadFile(const std::string& path) { + std::ifstream ifs(path); + EXPECT_TRUE(ifs.is_open()) << path; + std::stringstream buffer; + buffer << ifs.rdbuf(); + return buffer.str(); + } + + SimpleConverter& GetConverter(const std::string& config) { + auto it = converters_.find(config); + if (it != converters_.end()) { + return *it->second; + } + const std::string configPath = configDir_ + "/" + config + ".json"; + auto inserted = converters_.emplace( + config, + std::make_unique(configPath, + std::vector{ + configDir_, dictDir_})); + return *inserted.first->second; + } + + std::unique_ptr runfiles_; + std::string testcasesPath_; + std::string configDir_; + std::string dictDir_; + std::unordered_map> + converters_; +}; + +TEST_F(ConfigDictValidationTest, ConvertExpectedOutputs) { + const std::string json = ReadFile(testcasesPath_); + rapidjson::Document doc; + doc.Parse(json.c_str()); + ASSERT_FALSE(doc.HasParseError()); + ASSERT_TRUE(doc.IsObject()); + ASSERT_TRUE(doc.HasMember("cases")); + const auto& cases = doc["cases"]; + ASSERT_TRUE(cases.IsArray()); + + for (auto& testcase : cases.GetArray()) { + ASSERT_TRUE(testcase.IsObject()); + ASSERT_TRUE(testcase.HasMember("input")); + ASSERT_TRUE(testcase["input"].IsString()); + const std::string input = testcase["input"].GetString(); + const std::string id = + testcase.HasMember("id") && testcase["id"].IsString() + ? testcase["id"].GetString() + : "(unknown id)"; + ASSERT_TRUE(testcase.HasMember("expected")); + const auto& expectedObj = testcase["expected"]; + ASSERT_TRUE(expectedObj.IsObject()); + for (auto itr = expectedObj.MemberBegin(); itr != expectedObj.MemberEnd(); + ++itr) { + const std::string config = itr->name.GetString(); + ASSERT_TRUE(itr->value.IsString()); + const std::string expected = itr->value.GetString(); + SimpleConverter& converter = GetConverter(config); + EXPECT_EQ(expected, converter.Convert(input)) + << "config=" << config << " case=" << id; + } + } +} + +} // namespace +} // namespace opencc + +#endif // BAZEL diff --git a/data/dictionary/BUILD.bazel b/data/dictionary/BUILD.bazel index 3a1f1aa0a..253f656f3 100644 --- a/data/dictionary/BUILD.bazel +++ b/data/dictionary/BUILD.bazel @@ -13,6 +13,14 @@ genrule( tools = ["//data/scripts:merge"], ) +# These component files are only used for merge_TWPhrases and should not +# produce standalone .ocd2 outputs. +PHRASE_PARTS = [ + "TWPhrasesIT.txt", + "TWPhrasesName.txt", + "TWPhrasesOther.txt", +] + [ genrule( name = "reverse_" + txt, @@ -30,7 +38,7 @@ genrule( ] ] -TEXT_DICTS = glob(["*.txt"]) + [ +TEXT_DICTS = glob(["*.txt"], exclude = PHRASE_PARTS) + [ "TWPhrases.txt", "TWVariantsRev.txt", "TWPhrasesRev.txt", diff --git a/data/dictionary/DictionaryTest.cpp b/data/dictionary/DictionaryTest.cpp index 7b931c722..24776214a 100644 --- a/data/dictionary/DictionaryTest.cpp +++ b/data/dictionary/DictionaryTest.cpp @@ -46,13 +46,12 @@ std::string DictionaryTest::runfile_dir_; INSTANTIATE_TEST_SUITE_P( , DictionaryTest, - ::testing::Values("HKVariants", "HKVariantsRevPhrases", - "JPShinjitaiCharacters", "JPShinjitaiPhrases", - "JPVariants", "STCharacters", "STPhrases", "TSCharacters", - "TSPhrases", "TWPhrasesIT", "TWPhrasesName", - "TWPhrasesOther", "TWVariants", "TWVariantsRevPhrases", - "TWPhrases", "TWVariantsRev", "TWPhrasesRev", - "HKVariantsRev", "JPVariantsRev"), + ::testing::Values( + "HKVariants", "HKVariantsRev", "HKVariantsRevPhrases", + "JPShinjitaiCharacters", "JPShinjitaiPhrases", "JPVariants", + "JPVariantsRev", "STCharacters", "STPhrases", "TSCharacters", + "TSPhrases", "TWPhrases", "TWPhrasesRev", "TWVariants", + "TWVariantsRev", "TWVariantsRevPhrases"), [](const testing::TestParamInfo& info) { return info.param; }); diff --git a/data/dictionary/STPhrases.txt b/data/dictionary/STPhrases.txt index d4add2b11..f58f84bfa 100644 --- a/data/dictionary/STPhrases.txt +++ b/data/dictionary/STPhrases.txt @@ -39447,6 +39447,7 @@ 胖姑娘坐小轿儿 胖姑娘坐小轎兒 胚叶 胚葉 胚胎发生 胚胎發生 +胚胎发育 胚胎發育 胚胎干 胚胎幹 胜不骄 勝不驕 胜不骄败不馁 勝不驕敗不餒 diff --git a/data/dictionary/TWPhrasesOther.txt b/data/dictionary/TWPhrasesOther.txt index 7a87ae701..9285daeb9 100644 --- a/data/dictionary/TWPhrasesOther.txt +++ b/data/dictionary/TWPhrasesOther.txt @@ -24,6 +24,7 @@ 自行車 腳踏車 詞組 片語 蹦極 高空彈跳 +軟體動物 軟體動物 輔音 子音 通過 透過 通過 酰 醯 diff --git a/data/dictionary/TWVariants.txt b/data/dictionary/TWVariants.txt index 023a0687b..f4bf63b1d 100644 --- a/data/dictionary/TWVariants.txt +++ b/data/dictionary/TWVariants.txt @@ -26,6 +26,7 @@ 纔 才 羣 群 脣 唇 +臺 台 蔘 參 蔿 蒍 衆 眾 diff --git a/doc/ALGORITHM_AND_LIMITATIONS.md b/doc/ALGORITHM_AND_LIMITATIONS.md new file mode 100644 index 000000000..254c6dab8 --- /dev/null +++ b/doc/ALGORITHM_AND_LIMITATIONS.md @@ -0,0 +1,443 @@ +# OpenCC 演算法與理論局限性分析 + +本文件深入探討 OpenCC 的核心演算法設計、實作細節,以及在中文簡繁轉換領域中面臨的理論局限性。 + +## 目錄 + +- [核心模組架構](#核心模組架構) +- [最大正向匹配分詞算法](#最大正向匹配分詞算法) +- [轉換鏈機制](#轉換鏈機制) +- [詞典系統](#詞典系統) +- [理論局限性](#理論局限性) +- [與現代方法的比較](#與現代方法的比較) + +## 核心模組架構 + +OpenCC 採用模組化設計,主要包含以下核心元件: + +``` +配置層 (Config) + ↓ +分詞器 (Segmentation) + ↓ +轉換鏈 (ConversionChain) + ↓ +詞典查詢 (Dict) +``` + +### 執行流程 + +1. **載入配置** (`src/Config.cpp`) + - 解析 JSON 設定檔(例如 `s2t.json`) + - 初始化分詞器與詞典 + - 建立轉換鏈 + +2. **文本分詞** (`src/MaxMatchSegmentation.cpp`) + - 將輸入文本切分為片段(Segments) + - 使用詞典進行最大正向匹配 + +3. **鏈式轉換** (`src/ConversionChain.cpp`) + - 依序通過多個轉換節點 + - 每個節點查詢對應詞典進行替換 + +4. **輸出結果** + - 合併所有片段,返回轉換後的文本 + +## 最大正向匹配分詞算法 + +### 演算法原理 + +OpenCC 預設使用 **最大正向匹配**(Maximum Forward Matching, MaxMatch)演算法進行分詞。這是一種貪婪演算法,從左到右掃描文本,每次嘗試匹配最長的詞條。 + +### 演算法步驟 + +``` +輸入:文本 T,詞典 D +輸出:片段列表 S + +i = 0 +while i < len(T): + max_len = 0 + matched = None + + # 從當前位置開始,嘗試匹配所有可能的前綴 + for length in range(MAX_WORD_LENGTH, 0, -1): + prefix = T[i:i+length] + if prefix in D: + max_len = length + matched = prefix + break + + if matched: + S.append(Segment(matched, is_word=True)) + i += max_len + else: + # 無法匹配,保留單個字元 + S.append(Segment(T[i], is_word=False)) + i += 1 +``` + +### 實作細節 + +在 `src/MaxMatchSegmentation.cpp` 中,核心實作使用 MARISA trie 或 Darts double-array trie 進行高效前綴匹配: + +```cpp +// 偽代碼示意 +Optional MatchPrefix(const string& text) { + // 使用 trie 查找最長前綴 + size_t max_length = 0; + for (auto result : trie.common_prefix_search(text)) { + max_length = max(max_length, result.length); + } + return max_length > 0 ? Optional(max_length) : None; +} +``` + +### 範例 + +假設詞典包含:`一`, `一個`, `一個人`, `人` + +輸入文本:`一個人` + +執行過程: +1. 位置 0:嘗試匹配 `一個人`(3字)→ 命中 ✓ +2. 結果:`[一個人]` + +輸入文本:`一個人去` + +執行過程: +1. 位置 0:嘗試匹配 `一個人去`(4字)→ 未命中 +2. 位置 0:嘗試匹配 `一個人`(3字)→ 命中 ✓ +3. 位置 3:嘗試匹配 `去`(1字)→ 未命中(但保留為單字元片段) +4. 結果:`[一個人] [去]` + +## 轉換鏈機制 + +### 多階段轉換 + +OpenCC 支援多階段轉換,透過 `conversion_chain` 配置實現。以 `s2twp.json` 為例: + +```json +{ + "conversion_chain": [ + { + "dict": { + "type": "group", + "dicts": [ + {"type": "ocd2", "file": "STPhrases.ocd2"}, + {"type": "ocd2", "file": "STCharacters.ocd2"} + ] + } + }, + { + "dict": {"type": "ocd2", "file": "TWPhrases.ocd2"} + }, + { + "dict": {"type": "ocd2", "file": "TWVariants.ocd2"} + } + ] +} +``` + +### 轉換流程 + +1. **第一階段**:基本簡繁轉換 + - 使用 `STPhrases.ocd2`(詞組優先) + - 使用 `STCharacters.ocd2`(單字後備) + +2. **第二階段**:臺灣慣用詞替換 + - 使用 `TWPhrases.ocd2` + - 例如:`計算機` → `電腦` + +3. **第三階段**:臺灣異體字調整 + - 使用 `TWVariants.ocd2` + - 例如:`爲` → `為` + +每個階段的輸出會成為下一階段的輸入,形成鏈式處理。 + +### 詞典組合(DictGroup) + +在同一階段中,可以將多個詞典組合使用。查詢時會依序嘗試每個詞典,返回第一個匹配結果: + +```cpp +// 偽代碼 +Optional DictGroup::Match(const string& key) { + for (auto& dict : dicts) { + auto result = dict->Match(key); + if (result.has_value()) { + return result; // 返回第一個匹配 + } + } + return None; +} +``` + +這允許實現「詞組優先,單字後備」的策略。 + +## 詞典系統 + +### 詞典格式 + +OpenCC 支援多種詞典格式: + +1. **TextDict** (`.txt`) + - 純文字格式:`來源目標` + - 開發時使用,便於編輯 + +2. **MarisaDict** (`.ocd2`) + - 基於 MARISA trie 的二進位格式 + - 高效能、低記憶體佔用 + - 生產環境預設格式 + +3. **DartsDict** (`.ocd`) + - 基於 Darts double-array trie(舊格式) + - 相容性支援 + +### 前綴匹配 + +所有詞典實作都支援最長前綴匹配(Longest Prefix Match): + +```cpp +// Dict 介面 +class Dict { +public: + // 精確匹配 + virtual Optional> Match(const string& key) = 0; + + // 最長前綴匹配 + virtual Optional MatchPrefix(const string& text) = 0; + + // 所有前綴匹配 + virtual vector MatchAllPrefixes(const string& text) = 0; +}; +``` + +MARISA trie 的 `common_prefix_search` 方法能高效查找所有匹配的前綴,演算法複雜度為 O(k),其中 k 是匹配前綴的數量。 + +## 理論局限性 + +OpenCC 採用基於規則和詞典的方法,在實務上已相當成熟,但也存在一些根本性的理論局限。 + +### 1. 缺乏上下文理解 + +**問題**:最大正向匹配演算法是局部貪婪策略,不考慮上下文語境。 + +**範例**: + +``` +輸入:后天就是星期一了 +``` + +理想情況下,「后天」應該理解為「後天」(the day after tomorrow),但如果詞典中同時存在: +- `后` → `後` +- `后天` → `後天` + +演算法會貪婪地匹配最長的 `后天`,這在大多數情況下是正確的。但如果遇到: + +``` +輸入:皇后天生麗質 +``` + +這裡的「后」應該匹配到「皇后」而非「后天」。由於缺乏語境理解,可能產生錯誤的分詞和轉換。 + +**現實影響**: + +這類問題在 OpenCC 中透過精心設計詞典和優先級規則來緩解,但無法從根本上解決。例如: +- 將常見詞組(如「皇后」)加入詞典 +- 調整轉換鏈順序 +- 使用更長的詞組優先匹配 + +但這些都是啟發式(heuristic)方法,無法涵蓋所有語境。 + +### 2. 一對多歧義問題 + +**問題**:中文簡繁轉換中存在大量的「一對多」對應關係。 + +**經典範例**: + +| 簡體字 | 繁體字(多種可能) | 語境 | +|--------|-------------------|------| +| 干 | 乾、幹、干 | 乾燥、幹活、干擾 | +| 后 | 後、后 | 後天、皇后 | +| 面 | 麵、面 | 麵條、面孔 | +| 发 | 發、髮 | 發展、頭髮 | +| 制 | 製、制 | 製造、制度 | + +**OpenCC 的解決方案**: + +OpenCC 透過 **詞組優先** 策略來處理一對多問題: + +``` +# STPhrases.txt(詞組優先) +干燥 乾燥 +干扰 干擾 +干活 幹活 +皇后 皇后 +后天 後天 + +# STCharacters.txt(單字後備) +干 乾 +后 後 +``` + +這種方法的優點: +- 對常見詞組有良好效果 +- 不需要複雜的語言模型 +- 效能高,可預測性強 + +這種方法的局限: +- **詞表覆蓋問題**:需要窮舉所有可能的詞組組合,詞典會持續膨脹 +- **新詞問題**:遇到詞典中沒有的新詞或領域專有名詞時,只能退回到單字轉換,容易出錯 +- **維護負擔重**:需要持續收集錯誤案例,手動新增詞條 + +**範例說明**: + +假設輸入:`这个干燥剂很干净` + +1. 分詞結果:`[这个] [干燥] [剂] [很] [干净]` +2. 轉換結果:`這個乾燥劑很乾淨` ✓ + +但如果輸入:`干净的干燥环境`(詞典中沒有「干净」) + +1. 分詞結果:`[干] [净] [的] [干燥] [环境]` +2. 轉換結果:`乾淨的乾燥環境` + +這裡第一個「干」可能被誤判(如果用戶本意是「幹淨」的另一種用法)。 + +### 3. 不使用概率與語言模型 + +**問題**:OpenCC 不使用統計語言模型或機率分佈來選擇最佳轉換。 + +**對比現代方法**: + +| 方法 | OpenCC(規則) | 基於統計模型 | 基於神經網路 | +|------|---------------|-------------|-------------| +| 上下文理解 | ✗ | 有限(N-gram) | ✓(雙向) | +| 概率選擇 | ✗ | ✓ | ✓ | +| 新詞處理 | ✗ | 有限 | ✓ | +| 可解釋性 | ✓ | 有限 | ✗ | +| 效能 | ✓ 極快 | 中等 | 較慢 | +| 維護成本 | 高(詞典) | 中等(語料庫) | 低(自動學習) | + +現代基於神經網路的方法(如 Transformer 模型)可以: +- 理解雙向上下文 +- 學習隱式的語言規律 +- 處理未見過的詞組 +- 減少人工維護詞典的負擔 + +但這些方法也有缺點: +- 計算成本高 +- 需要大量訓練語料 +- 可解釋性差 +- 可能產生非預期的結果 + +### 4. 詞典維護負擔 + +**問題**:隨著語言演變和新詞出現,詞典需要持續更新。 + +**維護挑戰**: + +1. **詞表膨脹** + - `STPhrases.txt` 目前包含 ~60,000+ 詞條 + - 每個錯誤案例可能需要新增多個相關詞條 + - 詞條之間可能產生衝突,需要仔細排序和去重 + +2. **品質控制** + - 需要人工審核每個詞條的正確性 + - 不同地區(大陸、臺灣、香港)的用詞差異需要分別處理 + - 專業領域(醫學、法律、科技)的術語需要專家審核 + +3. **社群協作** + - 依賴社群回報問題和提交 PR + - 需要維護者持續投入時間審核和合併 + - 可能產生主觀判斷的分歧 + +**數據規模**: + +| 詞典 | 詞條數量(約) | 用途 | +|------|--------------|------| +| STCharacters.txt | ~6,000 | 簡繁單字對照 | +| STPhrases.txt | ~60,000 | 簡繁詞組對照 | +| TWVariants.txt | ~1,000 | 臺灣異體字 | +| TWPhrases*.txt | ~50,000 | 臺灣地區用詞 | +| HKVariants*.txt | ~5,000 | 香港地區用詞 | + +隨著時間推移,這些數字會持續增長,維護負擔也隨之增加。 + +### 5. 領域適應性問題 + +**問題**:通用詞典難以適應所有專業領域。 + +**範例**: + +- **醫學領域**:`干细胞` → 應為 `幹細胞`(stem cell),而非 `乾細胞` +- **法律領域**:`制定` → 應為 `制定`(legislate),而非 `製定` +- **科技領域**:`接口` → 大陸用語,臺灣可能用 `介面`(interface) + +**解決方案**: + +OpenCC 允許用戶自訂詞典,但這也意味著: +- 每個專業領域都需要維護自己的詞典補充 +- 增加了使用者的配置複雜度 +- 不同領域詞典之間可能產生衝突 + +## 與現代方法的比較 + +### OpenCC 的優勢 + +1. **確定性與可控性** + - 行為完全可預測 + - 可以透過修改詞典精確控制輸出 + - 適合需要一致性的應用場景 + +2. **效能優異** + - 使用 MARISA trie 實現高效查詢 + - 無需 GPU,CPU 即可達到極高吞吐量 + - 適合即時轉換場景 + +3. **輕量級部署** + - 詞典檔案總大小 < 10MB + - 無需額外的模型檔案 + - 可輕鬆整合到瀏覽器(WebAssembly)或移動端 + +4. **開放與可審核** + - 所有詞條都可被審核和修改 + - 社群可以貢獻和驗證 + - 不會產生「黑箱」問題 + +### 何時考慮其他方法 + +如果您的應用場景符合以下條件,可能需要考慮基於統計或深度學習的方法: + +1. **需要處理大量未登錄詞**(新詞、網路用語) +2. **需要理解複雜語境**(如需要跨句理解) +3. **願意犧牲效能換取準確度**(可接受較長的處理時間) +4. **有足夠的訓練語料和計算資源** + +## 總結 + +OpenCC 的設計哲學是:**在可接受的維護成本下,提供確定性、高效能的簡繁轉換**。 + +**理論局限性**: +- ✗ 不理解語境和語義 +- ✗ 不使用概率和語言模型 +- ✗ 一對多問題依賴詞典窮舉 +- ✗ 維護負擔隨詞典增長而增加 + +**實務優勢**: +- ✓ 對常見文本轉換效果優異 +- ✓ 效能極佳,適合生產環境 +- ✓ 行為可預測,易於除錯 +- ✓ 社群驅動,持續改進 + +對於大多數中文簡繁轉換場景,OpenCC 的規則方法已經足夠好用。只有在面對高度專業化或語境複雜的文本時,才需要考慮引入更複雜的語言模型。 + +## 參考資料 + +- OpenCC 原始碼:https://github.com/BYVoid/OpenCC +- MARISA trie:https://github.com/s-yata/marisa-trie +- 相關論文:Chinese Word Segmentation: A Decade Review (Wong et al.) + +--- + +*本文件由 OpenCC 社群維護。如有建議或勘誤,歡迎提交 Issue 或 Pull Request。* diff --git a/node/test.js b/node/test.js index 70d644412..ffc85860e 100644 --- a/node/test.js +++ b/node/test.js @@ -4,85 +4,56 @@ const util = require('util'); const OpenCC = require('./opencc'); -const configs = [ - 'hk2s', - 'hk2t', - 'jp2t', - 's2hk', - 's2t', - 's2tw', - 's2twp', - 't2hk', - 't2jp', - 't2s', - 'tw2s', - 'tw2sp', - 'tw2t', -]; +const cases = JSON.parse(fs.readFileSync('test/testcases/testcases.json', 'utf-8')).cases || []; -const testSync = function (config, done) { - const inputName = 'test/testcases/' + config + '.in'; - const outputName = 'test/testcases/' + config + '.ans'; - const configName = config + '.json'; - const opencc = new OpenCC(configName); - const text = fs.readFileSync(inputName, 'utf-8'); - const converted = opencc.convertSync(text); - const answer = fs.readFileSync(outputName, 'utf-8'); - assert.equal(converted, answer); +const testSync = function (tc, cfg, expected, done) { + const opencc = new OpenCC(cfg + '.json'); + const converted = opencc.convertSync(tc.input); + assert.equal(converted, expected); done(); }; -const testAsync = function (config, done) { - const inputName = 'test/testcases/' + config + '.in'; - const outputName = 'test/testcases/' + config + '.ans'; - const configName = config + '.json'; - const opencc = new OpenCC(configName); - fs.readFile(inputName, 'utf-8', function (err, text) { +const testAsync = function (tc, cfg, expected, done) { + const opencc = new OpenCC(cfg + '.json'); + opencc.convert(tc.input, function (err, converted) { if (err) return done(err); - opencc.convert(text, function (err, converted) { - if (err) return done(err); - fs.readFile(outputName, 'utf-8', function (err, answer) { - if (err) return done(err); - assert.equal(converted, answer); - done(); - }); - }); + assert.equal(converted, expected); + done(); }); }; -async function testAsyncPromise(config) { - const inputName = 'test/testcases/' + config + '.in'; - const outputName = 'test/testcases/' + config + '.ans'; - const configName = config + '.json'; - const opencc = new OpenCC(configName); - - const text = await util.promisify(fs.readFile)(inputName, 'utf-8'); - const converted = await opencc.convertPromise(text); - const answer = await util.promisify(fs.readFile)(outputName, 'utf-8'); - - assert.equal(converted, answer); -}; +async function testAsyncPromise(tc, cfg, expected) { + const opencc = new OpenCC(cfg + '.json'); + const converted = await opencc.convertPromise(tc.input); + assert.equal(converted, expected); +} describe('Sync API', function () { - configs.forEach(function (config) { - it(config, function (done) { - testSync(config, done); + cases.forEach(function (tc, idx) { + Object.entries(tc.expected || {}).forEach(function ([cfg, expected]) { + it('[' + cfg + '] case #' + (idx + 1), function (done) { + testSync(tc, cfg, expected, done); + }); }); }); }); describe('Async API', function () { - configs.forEach(function (config) { - it(config, function (done) { - testAsync(config, done); + cases.forEach(function (tc, idx) { + Object.entries(tc.expected || {}).forEach(function ([cfg, expected]) { + it('[' + cfg + '] case #' + (idx + 1), function (done) { + testAsync(tc, cfg, expected, done); + }); }); }); }); describe('Async Promise API', function () { - configs.forEach(function (config) { - it(config, function (done) { - testAsyncPromise(config).then(done); + cases.forEach(function (tc, idx) { + Object.entries(tc.expected || {}).forEach(function ([cfg, expected]) { + it('[' + cfg + '] case #' + (idx + 1), function (done) { + testAsyncPromise(tc, cfg, expected).then(() => done(), done); + }); }); }); }); diff --git a/python/tests/test_opencc.py b/python/tests/test_opencc.py index 31b2b0453..ff34d0556 100644 --- a/python/tests/test_opencc.py +++ b/python/tests/test_opencc.py @@ -1,14 +1,13 @@ from __future__ import unicode_literals +import json import os import pytest import sys -from glob import glob - _this_dir = os.path.dirname(os.path.abspath(__file__)) _opencc_rootdir = os.path.abspath(os.path.join(_this_dir, '..', '..')) -_test_assets_dir = os.path.join(_opencc_rootdir, 'test', 'testcases') +_testcases_path = os.path.join(_opencc_rootdir, 'test', 'testcases', 'testcases.json') def test_import(): @@ -26,22 +25,18 @@ def test_init_delete_converter(): def test_conversion(): import opencc - for inpath in glob(os.path.join(_test_assets_dir, '*.in')): - pref = os.path.splitext(inpath)[0] - config = os.path.basename(pref) - converter = opencc.OpenCC(config) - anspath = '{}.{}'.format(pref, 'ans') - assert os.path.isfile(anspath) - - with open(inpath, 'rb') as f: - intexts = [l.strip().decode('utf-8') for l in f] - with open(anspath, 'rb') as f: - anstexts = [l.strip().decode('utf-8') for l in f] - assert len(intexts) == len(anstexts) - - for text, ans in zip(intexts, anstexts): - assert converter.convert(text) == ans, \ - 'Failed to convert {} for {} -> {}'.format(pref, text, ans) + with open(_testcases_path, 'r', encoding='utf-8') as f: + parsed = json.load(f) + + for case in parsed.get('cases', []): + input_text = case.get('input') + expected = case.get('expected', {}) + if not input_text or not isinstance(expected, dict): + continue + for cfg, ans in expected.items(): + converter = opencc.OpenCC(f'{cfg}.json') + assert converter.convert(input_text) == ans, \ + 'Failed to convert {} for {} -> {}'.format(cfg, input_text, ans) if __name__ == "__main__": diff --git a/test/BUILD.bazel b/test/BUILD.bazel index 71a118a7a..c32e46498 100644 --- a/test/BUILD.bazel +++ b/test/BUILD.bazel @@ -22,5 +22,6 @@ cc_test( "//src:common", "@bazel_tools//tools/cpp/runfiles", "@googletest//:gtest_main", + "@rapidjson", ], ) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 61ce03346..58acf13b8 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,6 +1,7 @@ include_directories("${PROJECT_SOURCE_DIR}") include_directories("${PROJECT_BINARY_DIR}/src") include_directories("${PROJECT_SOURCE_DIR}/src") +include_directories("${PROJECT_SOURCE_DIR}/deps/rapidjson-1.1.0") set(CONFIG_TEST config_test/config_test.json diff --git a/test/CommandLineConvertTest.cpp b/test/CommandLineConvertTest.cpp index 7be8a398a..f53650e33 100644 --- a/test/CommandLineConvertTest.cpp +++ b/test/CommandLineConvertTest.cpp @@ -18,8 +18,13 @@ #include #include +#include +#include +#include +#include #include "src/Common.hpp" +#include "rapidjson/document.h" #include "gtest/gtest.h" #ifdef BAZEL @@ -76,14 +81,6 @@ class CommandLineConvertTest : public ::testing::Test { #endif } - std::string InputDirectory() const { -#ifdef BAZEL - return runfiles_->Rlocation("_main/test/testcases") + "/"; -#else - return CMAKE_SOURCE_DIR "/test/testcases/"; -#endif - } - std::string OutputDirectory() const { #ifdef BAZEL return ::testing::TempDir() + "/"; @@ -92,14 +89,6 @@ class CommandLineConvertTest : public ::testing::Test { #endif } - std::string AnswerDirectory() const { -#ifdef BAZEL - return runfiles_->Rlocation("_main/test/testcases") + "/"; -#else - return CMAKE_SOURCE_DIR "/test/testcases/"; -#endif - } - std::string ConfigurationDirectory() const { #ifdef BAZEL return ""; @@ -109,18 +98,15 @@ class CommandLineConvertTest : public ::testing::Test { } std::string InputFile(const char* config) const { - return InputDirectory() + config + ".in"; + return OutputDirectory() + config + ".in"; } std::string OutputFile(const char* config) const { return OutputDirectory() + config + ".out"; } - std::string AnswerFile(const char* config) const { - return AnswerDirectory() + config + ".ans"; - } - - std::string TestCommand(const char* config, const std::string& inputFile, + std::string TestCommand(const std::string& config, + const std::string& inputFile, const std::string& outputFile) const { std::string cmd = OpenccCommand() + " -i " + inputFile + " -o " + outputFile + " -c " + ConfigurationDirectory() + config + @@ -139,42 +125,94 @@ class CommandLineConvertTest : public ::testing::Test { #endif }; -class ConfigurationTest : public CommandLineConvertTest, - public ::testing::WithParamInterface {}; - -TEST_P(ConfigurationTest, Convert) { - const char* config = GetParam(); - const std::string inputFile = InputFile(config); - const std::string outputFile = OutputFile(config); - ASSERT_EQ(0, system(TestCommand(config, inputFile, outputFile).c_str())); - const std::string output = GetFileContents(OutputFile(config)); - const std::string answer = GetFileContents(AnswerFile(config)); - ASSERT_EQ(answer, output); -} +struct CaseInput { + std::string input; + std::string expected; +}; + +using CasesByConfig = std::unordered_map>; + +CasesByConfig LoadCases(const std::string& jsonPath) { + CasesByConfig cases; + std::string content; + { + std::ifstream ifs(jsonPath); + if (!ifs.is_open()) { + throw std::runtime_error("Cannot open " + jsonPath); + } + std::stringstream buffer; + buffer << ifs.rdbuf(); + content = buffer.str(); + } + + rapidjson::Document doc; + doc.Parse(content.c_str()); + if (doc.HasParseError() || !doc.IsObject() || !doc.HasMember("cases") || + !doc["cases"].IsArray()) { + throw std::runtime_error("Invalid testcases.json format"); + } -TEST_P(ConfigurationTest, InPlaceConvert) { - const char* config = GetParam(); - // Copy input to output - const std::string inputFile = InputFile(config); - const std::string outputFile = OutputFile(config); - std::ifstream source(inputFile, std::ios::binary); - std::ofstream dest(outputFile, std::ios::binary); - dest << source.rdbuf(); - source.close(); - dest.close(); - // Test in-place convert (same file) - ASSERT_EQ(0, system(TestCommand(config, outputFile, outputFile).c_str())); - const std::string output = GetFileContents(OutputFile(config)); - const std::string answer = GetFileContents(AnswerFile(config)); - ASSERT_EQ(answer, output); + for (auto& entry : doc["cases"].GetArray()) { + if (!entry.IsObject() || !entry.HasMember("input") || + !entry["input"].IsString() || !entry.HasMember("expected") || + !entry["expected"].IsObject()) { + continue; + } + const std::string input = entry["input"].GetString(); + for (auto itr = entry["expected"].MemberBegin(); + itr != entry["expected"].MemberEnd(); ++itr) { + if (!itr->value.IsString()) { + continue; + } + const std::string config = itr->name.GetString(); + cases[config].push_back({input, itr->value.GetString()}); + } + } + return cases; } -INSTANTIATE_TEST_SUITE_P( - CommandLine, ConfigurationTest, - ::testing::Values("hk2s", "hk2t", "jp2t", "s2hk", "s2t", "s2tw", "s2twp", - "t2hk", "t2jp", "t2s", "tw2s", "tw2sp", "tw2t"), - [](const testing::TestParamInfo& info) { - return info.param; - }); +TEST_F(CommandLineConvertTest, ConvertFromJson) { +#ifdef BAZEL + const std::string casesPath = + runfiles_->Rlocation("_main/test/testcases/testcases.json"); +#else + const std::string casesPath = CMAKE_SOURCE_DIR "/test/testcases/testcases.json"; +#endif + const CasesByConfig cases = LoadCases(casesPath); + + for (const auto& entry : cases) { + const std::string& config = entry.first; + const std::string inputFile = InputFile(config.c_str()); + const std::string outputFile = OutputFile(config.c_str()); + + // Write inputs into a temp file (one per line). + { + std::ofstream ofs(inputFile, std::ios::binary); + ASSERT_TRUE(ofs.is_open()) << "Failed to open input file for writing: " + << inputFile; + for (const auto& item : entry.second) { + ofs << item.input << "\n"; + } + } + + ASSERT_EQ(0, system(TestCommand(config, inputFile, outputFile).c_str())); + + // Read outputs and compare line by line. + std::ifstream ifs(outputFile, std::ios::binary); + ASSERT_TRUE(ifs.is_open()); + std::string line; + size_t idx = 0; + while (std::getline(ifs, line)) { + if (!line.empty() && line.back() == '\r') { + line.pop_back(); // normalize Windows CRLF + } + ASSERT_LT(idx, entry.second.size()); + EXPECT_EQ(entry.second[idx].expected, line) + << "config=" << config << " index=" << idx; + idx++; + } + EXPECT_EQ(idx, entry.second.size()) << "config=" << config; + } +} } // namespace opencc diff --git a/test/testcases/BUILD.bazel b/test/testcases/BUILD.bazel index 4c9a63ab5..0a1c3e136 100644 --- a/test/testcases/BUILD.bazel +++ b/test/testcases/BUILD.bazel @@ -2,5 +2,5 @@ package(default_visibility = ["//visibility:public"]) filegroup( name = "testcases", - srcs = glob(["*.in"]) + glob(["*.ans"]), + srcs = ["testcases.json"], ) diff --git a/test/testcases/hk2s.ans b/test/testcases/hk2s.ans deleted file mode 100644 index 8644ddc48..000000000 --- a/test/testcases/hk2s.ans +++ /dev/null @@ -1,3 +0,0 @@ -虚伪叹息 -潮湿灶台 -赞叹沙河涌汹涌的波浪 \ No newline at end of file diff --git a/test/testcases/hk2s.in b/test/testcases/hk2s.in deleted file mode 100644 index 3c4549754..000000000 --- a/test/testcases/hk2s.in +++ /dev/null @@ -1,3 +0,0 @@ -虛偽歎息 -潮濕灶台 -讚歎沙河涌洶湧的波浪 \ No newline at end of file diff --git a/test/testcases/hk2t.ans b/test/testcases/hk2t.ans deleted file mode 100644 index ae75d251c..000000000 --- a/test/testcases/hk2t.ans +++ /dev/null @@ -1,2 +0,0 @@ -爲賦新詞強說愁 -想到自己一緊張就口吃,我就沒胃口喫飯 \ No newline at end of file diff --git a/test/testcases/hk2t.in b/test/testcases/hk2t.in deleted file mode 100644 index 1bbc409c5..000000000 --- a/test/testcases/hk2t.in +++ /dev/null @@ -1,2 +0,0 @@ -為賦新詞強説愁 -想到自己一緊張就口吃,我就沒胃口吃飯 \ No newline at end of file diff --git a/test/testcases/jp2t.ans b/test/testcases/jp2t.ans deleted file mode 100644 index 9cbcdcfc1..000000000 --- a/test/testcases/jp2t.ans +++ /dev/null @@ -1,4 +0,0 @@ -舊字體歷史假名遣 新字體現代假名遣 -橫濱 絲魚川 伊豫國 -驛辨當 辨別 辯護士 瓣膜 -藝術 缺航 欠缺 飲料罐 \ No newline at end of file diff --git a/test/testcases/jp2t.in b/test/testcases/jp2t.in deleted file mode 100644 index 9178c8a99..000000000 --- a/test/testcases/jp2t.in +++ /dev/null @@ -1,4 +0,0 @@ -旧字体歴史仮名遣 新字体現代仮名遣 -横浜 糸魚川 伊予国 -駅弁当 弁別 弁護士 弁膜 -芸術 欠航 欠缺 飲料缶 \ No newline at end of file diff --git a/test/testcases/s2hk.ans b/test/testcases/s2hk.ans deleted file mode 100644 index 9a38de5aa..000000000 --- a/test/testcases/s2hk.ans +++ /dev/null @@ -1,4 +0,0 @@ -虛偽嘆息 -潮濕灶台 -讚歎沙河涌洶湧的波浪 -為了核實這説法 \ No newline at end of file diff --git a/test/testcases/s2hk.in b/test/testcases/s2hk.in deleted file mode 100644 index be99830a0..000000000 --- a/test/testcases/s2hk.in +++ /dev/null @@ -1,4 +0,0 @@ -虚伪叹息 -潮湿灶台 -赞叹沙河涌汹涌的波浪 -为了核实这说法 \ No newline at end of file diff --git a/test/testcases/s2t.ans b/test/testcases/s2t.ans deleted file mode 100644 index 25511ae54..000000000 --- a/test/testcases/s2t.ans +++ /dev/null @@ -1,12 +0,0 @@ -誇誇其談 夸父逐日 -我幹什麼不干你事。 -太后的頭髮很乾燥。 -燕燕于飛,差池其羽。之子于歸,遠送於野。 -請成相,世之殃,愚闇愚闇墮賢良。人主無賢,如瞽無相何倀倀!請布基,慎聖人,愚而自專事不治。主忌苟勝,羣臣莫諫必逢災。 -曾經有一份真誠的愛情放在我面前,我沒有珍惜,等我失去的時候我才後悔莫及。人事間最痛苦的事莫過於此。如果上天能夠給我一個再來一次得機會,我會對那個女孩子說三個字,我愛你。如果非要在這份愛上加個期限,我希望是,一萬年。 -新的理論被發現了。 -金胄不是金色的甲冑。 -經理發現後勸諭兩人 -想到自己一緊張就口吃,我就沒胃口喫飯 -恒指最新消息,恒生指數跌破 2 萬點 -恒生銀行和恒大集團發佈財報 \ No newline at end of file diff --git a/test/testcases/s2t.in b/test/testcases/s2t.in deleted file mode 100644 index fe240250d..000000000 --- a/test/testcases/s2t.in +++ /dev/null @@ -1,12 +0,0 @@ -夸夸其谈 夸父逐日 -我干什么不干你事。 -太后的头发很干燥。 -燕燕于飞,差池其羽。之子于归,远送于野。 -请成相,世之殃,愚暗愚暗堕贤良。人主无贤,如瞽无相何伥伥!请布基,慎圣人,愚而自专事不治。主忌苟胜,群臣莫谏必逢灾。 -曾经有一份真诚的爱情放在我面前,我没有珍惜,等我失去的时候我才后悔莫及。人事间最痛苦的事莫过于此。如果上天能够给我一个再来一次得机会,我会对那个女孩子说三个字,我爱你。如果非要在这份爱上加个期限,我希望是,一万年。 -新的理论被发现了。 -金胄不是金色的甲胄。 -经理发现后劝谕两人 -想到自己一紧张就口吃,我就没胃口吃饭 -恒指最新消息,恒生指数跌破 2 万点 -恒生银行和恒大集团发布财报 \ No newline at end of file diff --git a/test/testcases/s2tw.ans b/test/testcases/s2tw.ans deleted file mode 100644 index 3ce62d546..000000000 --- a/test/testcases/s2tw.ans +++ /dev/null @@ -1,2 +0,0 @@ -著裝汙染虛偽發洩稜柱群眾裡面 -鯰魚和鯰魚是一種生物。 \ No newline at end of file diff --git a/test/testcases/s2tw.in b/test/testcases/s2tw.in deleted file mode 100644 index 5f9967ba9..000000000 --- a/test/testcases/s2tw.in +++ /dev/null @@ -1,2 +0,0 @@ -着装污染虚伪发泄棱柱群众里面 -鲶鱼和鲇鱼是一种生物。 \ No newline at end of file diff --git a/test/testcases/s2twp.ans b/test/testcases/s2twp.ans deleted file mode 100644 index 9abfa6f26..000000000 --- a/test/testcases/s2twp.ans +++ /dev/null @@ -1,6 +0,0 @@ -滑鼠裡面的矽二極體壞了,導致游標解析度降低。 -我們在寮國的伺服器的硬碟需要使用網際網路演算法軟體解決非同步的問題。 -為什麼你在床裡面睡著? -海內存知己 -摩爾線程 -字節跳動 \ No newline at end of file diff --git a/test/testcases/s2twp.in b/test/testcases/s2twp.in deleted file mode 100644 index d16681a82..000000000 --- a/test/testcases/s2twp.in +++ /dev/null @@ -1,6 +0,0 @@ -鼠标里面的硅二极管坏了,导致光标分辨率降低。 -我们在老挝的服务器的硬盘需要使用互联网算法软件解决异步的问题。 -为什么你在床里面睡着? -海内存知己 -摩尔线程 -字节跳动 \ No newline at end of file diff --git a/test/testcases/t2hk.ans b/test/testcases/t2hk.ans deleted file mode 100644 index 704b68785..000000000 --- a/test/testcases/t2hk.ans +++ /dev/null @@ -1,3 +0,0 @@ -潮濕的露台 -為了核實這説法 -包粽子活動告一段落 \ No newline at end of file diff --git a/test/testcases/t2hk.in b/test/testcases/t2hk.in deleted file mode 100644 index 26ac2e71f..000000000 --- a/test/testcases/t2hk.in +++ /dev/null @@ -1,3 +0,0 @@ -潮溼的露臺 -爲了覈實這說法 -包糉子活動告一段落 \ No newline at end of file diff --git a/test/testcases/t2jp.ans b/test/testcases/t2jp.ans deleted file mode 100644 index c5e8a381d..000000000 --- a/test/testcases/t2jp.ans +++ /dev/null @@ -1,4 +0,0 @@ -旧字体歴史仮名遣 新字体現代仮名遣 -横浜 糸魚川 伊予国 -駅弁当 弁別 弁護士 弁膜 -芸術 欠航 飲料缶 \ No newline at end of file diff --git a/test/testcases/t2jp.in b/test/testcases/t2jp.in deleted file mode 100644 index c2626d233..000000000 --- a/test/testcases/t2jp.in +++ /dev/null @@ -1,4 +0,0 @@ -舊字體歷史假名遣 新字體現代假名遣 -橫濱 絲魚川 伊豫國 -驛辨當 辨別 辯護士 瓣膜 -藝術 缺航 飲料罐 \ No newline at end of file diff --git a/test/testcases/t2s.ans b/test/testcases/t2s.ans deleted file mode 100644 index 299716adf..000000000 --- a/test/testcases/t2s.ans +++ /dev/null @@ -1,2 +0,0 @@ -曾经有一份真诚的爱情放在我面前,我没有珍惜,等我失去的时候我才后悔莫及。人事间最痛苦的事莫过于此。如果上天能够给我一个再来一次得机会,我会对那个女孩子说三个字,我爱你。如果非要在这份爱上加个期限,我希望是,一万年。 -二𫫇英 \ No newline at end of file diff --git a/test/testcases/t2s.in b/test/testcases/t2s.in deleted file mode 100644 index eef2f346e..000000000 --- a/test/testcases/t2s.in +++ /dev/null @@ -1,2 +0,0 @@ -曾經有一份真誠的愛情放在我面前,我沒有珍惜,等我失去的時候我才後悔莫及。人事間最痛苦的事莫過於此。如果上天能夠給我一個再來一次得機會,我會對那個女孩子說三個字,我愛你。如果非要在這份愛上加個期限,我希望是,一萬年。 -二噁英 \ No newline at end of file diff --git a/test/testcases/testcases.json b/test/testcases/testcases.json new file mode 100755 index 000000000..b40da828a --- /dev/null +++ b/test/testcases/testcases.json @@ -0,0 +1,382 @@ +{ + "cases": [ + { + "id": "case_001", + "input": "虛偽歎息", + "expected": { + "hk2s": "虚伪叹息" + } + }, + { + "id": "case_002", + "input": "潮濕灶台", + "expected": { + "hk2s": "潮湿灶台" + } + }, + { + "id": "case_003", + "input": "讚歎沙河涌洶湧的波浪", + "expected": { + "hk2s": "赞叹沙河涌汹涌的波浪" + } + }, + { + "id": "case_004", + "input": "為賦新詞強説愁", + "expected": { + "hk2t": "爲賦新詞強說愁" + } + }, + { + "id": "case_005", + "input": "想到自己一緊張就口吃,我就沒胃口吃飯", + "expected": { + "hk2t": "想到自己一緊張就口吃,我就沒胃口喫飯", + "tw2t": "想到自己一緊張就口吃,我就沒胃口喫飯" + } + }, + { + "id": "case_006", + "input": "旧字体歴史仮名遣 新字体現代仮名遣", + "expected": { + "jp2t": "舊字體歷史假名遣 新字體現代假名遣" + } + }, + { + "id": "case_007", + "input": "横浜 糸魚川 伊予国", + "expected": { + "jp2t": "橫濱 絲魚川 伊豫國" + } + }, + { + "id": "case_008", + "input": "駅弁当 弁別 弁護士 弁膜", + "expected": { + "jp2t": "驛辨當 辨別 辯護士 瓣膜" + } + }, + { + "id": "case_009", + "input": "芸術 欠航 欠缺 飲料缶", + "expected": { + "jp2t": "藝術 缺航 欠缺 飲料罐" + } + }, + { + "id": "case_010", + "input": "虚伪叹息", + "expected": { + "s2hk": "虛偽嘆息" + } + }, + { + "id": "case_011", + "input": "潮湿灶台", + "expected": { + "s2hk": "潮濕灶台" + } + }, + { + "id": "case_012", + "input": "赞叹沙河涌汹涌的波浪", + "expected": { + "s2hk": "讚歎沙河涌洶湧的波浪" + } + }, + { + "id": "case_013", + "input": "为了核实这说法", + "expected": { + "s2hk": "為了核實這説法" + } + }, + { + "id": "case_014", + "input": "高剂量的苦瓜素还会抑制胚胎发育", + "expected": { + "s2hk": "高劑量的苦瓜素還會抑制胚胎發育", + "s2t": "高劑量的苦瓜素還會抑制胚胎發育", + "s2tw": "高劑量的苦瓜素還會抑制胚胎發育", + "s2twp": "高劑量的苦瓜素還會抑制胚胎發育" + } + }, + { + "id": "case_015", + "input": "夸夸其谈 夸父逐日", + "expected": { + "s2t": "誇誇其談 夸父逐日" + } + }, + { + "id": "case_016", + "input": "我干什么不干你事。", + "expected": { + "s2t": "我幹什麼不干你事。" + } + }, + { + "id": "case_017", + "input": "太后的头发很干燥。", + "expected": { + "s2t": "太后的頭髮很乾燥。" + } + }, + { + "id": "case_018", + "input": "燕燕于飞,差池其羽。之子于归,远送于野。", + "expected": { + "s2t": "燕燕于飛,差池其羽。之子于歸,遠送於野。" + } + }, + { + "id": "case_019", + "input": "请成相,世之殃,愚暗愚暗堕贤良。人主无贤,如瞽无相何伥伥!请布基,慎圣人,愚而自专事不治。主忌苟胜,群臣莫谏必逢灾。", + "expected": { + "s2t": "請成相,世之殃,愚闇愚闇墮賢良。人主無賢,如瞽無相何倀倀!請布基,慎聖人,愚而自專事不治。主忌苟勝,羣臣莫諫必逢災。" + } + }, + { + "id": "case_020", + "input": "曾经有一份真诚的爱情放在我面前,我没有珍惜,等我失去的时候我才后悔莫及。人事间最痛苦的事莫过于此。如果上天能够给我一个再来一次得机会,我会对那个女孩子说三个字,我爱你。如果非要在这份爱上加个期限,我希望是,一万年。", + "expected": { + "s2t": "曾經有一份真誠的愛情放在我面前,我沒有珍惜,等我失去的時候我才後悔莫及。人事間最痛苦的事莫過於此。如果上天能夠給我一個再來一次得機會,我會對那個女孩子說三個字,我愛你。如果非要在這份愛上加個期限,我希望是,一萬年。" + } + }, + { + "id": "case_021", + "input": "新的理论被发现了。", + "expected": { + "s2t": "新的理論被發現了。" + } + }, + { + "id": "case_022", + "input": "金胄不是金色的甲胄。", + "expected": { + "s2t": "金胄不是金色的甲冑。" + } + }, + { + "id": "case_023", + "input": "经理发现后劝谕两人", + "expected": { + "s2t": "經理發現後勸諭兩人" + } + }, + { + "id": "case_024", + "input": "想到自己一紧张就口吃,我就没胃口吃饭", + "expected": { + "s2t": "想到自己一緊張就口吃,我就沒胃口喫飯" + } + }, + { + "id": "case_025", + "input": "恒指最新消息,恒生指数跌破 2 万点", + "expected": { + "s2t": "恒指最新消息,恒生指數跌破 2 萬點" + } + }, + { + "id": "case_026", + "input": "恒生银行和恒大集团发布财报", + "expected": { + "s2t": "恒生銀行和恒大集團發佈財報" + } + }, + { + "id": "case_027", + "input": "着装污染虚伪发泄棱柱群众里面", + "expected": { + "s2tw": "著裝汙染虛偽發洩稜柱群眾裡面" + } + }, + { + "id": "case_028", + "input": "鲶鱼和鲇鱼是一种生物。", + "expected": { + "s2tw": "鯰魚和鯰魚是一種生物。" + } + }, + { + "id": "case_029", + "input": "鼠标里面的硅二极管坏了,导致光标分辨率降低。", + "expected": { + "s2twp": "滑鼠裡面的矽二極體壞了,導致游標解析度降低。" + } + }, + { + "id": "case_030", + "input": "我们在老挝的服务器的硬盘需要使用互联网算法软件解决异步的问题。", + "expected": { + "s2twp": "我們在寮國的伺服器的硬碟需要使用網際網路演算法軟體解決非同步的問題。" + } + }, + { + "id": "case_031", + "input": "为什么你在床里面睡着?", + "expected": { + "s2twp": "為什麼你在床裡面睡著?" + } + }, + { + "id": "case_032", + "input": "海内存知己", + "expected": { + "s2twp": "海內存知己" + } + }, + { + "id": "case_033", + "input": "摩尔线程", + "expected": { + "s2twp": "摩爾線程" + } + }, + { + "id": "case_034", + "input": "字节跳动", + "expected": { + "s2twp": "字節跳動" + } + }, + { + "id": "case_035", + "input": "潮溼的露臺", + "expected": { + "t2hk": "潮濕的露台" + } + }, + { + "id": "case_036", + "input": "爲了覈實這說法", + "expected": { + "t2hk": "為了核實這説法" + } + }, + { + "id": "case_037", + "input": "包糉子活動告一段落", + "expected": { + "t2hk": "包粽子活動告一段落" + } + }, + { + "id": "case_038", + "input": "舊字體歷史假名遣 新字體現代假名遣", + "expected": { + "t2jp": "旧字体歴史仮名遣 新字体現代仮名遣" + } + }, + { + "id": "case_039", + "input": "橫濱 絲魚川 伊豫國", + "expected": { + "t2jp": "横浜 糸魚川 伊予国" + } + }, + { + "id": "case_040", + "input": "驛辨當 辨別 辯護士 瓣膜", + "expected": { + "t2jp": "駅弁当 弁別 弁護士 弁膜" + } + }, + { + "id": "case_041", + "input": "藝術 缺航 飲料罐", + "expected": { + "t2jp": "芸術 欠航 飲料缶" + } + }, + { + "id": "case_042", + "input": "曾經有一份真誠的愛情放在我面前,我沒有珍惜,等我失去的時候我才後悔莫及。人事間最痛苦的事莫過於此。如果上天能夠給我一個再來一次得機會,我會對那個女孩子說三個字,我愛你。如果非要在這份愛上加個期限,我希望是,一萬年。", + "expected": { + "t2s": "曾经有一份真诚的爱情放在我面前,我没有珍惜,等我失去的时候我才后悔莫及。人事间最痛苦的事莫过于此。如果上天能够给我一个再来一次得机会,我会对那个女孩子说三个字,我爱你。如果非要在这份爱上加个期限,我希望是,一万年。" + } + }, + { + "id": "case_043", + "input": "二噁英", + "expected": { + "t2s": "二𫫇英" + } + }, + { + "id": "case_044", + "input": "著裝著作汙染虛偽發洩稜柱群眾裡面", + "expected": { + "tw2s": "着装著作污染虚伪发泄棱柱群众里面" + } + }, + { + "id": "case_045", + "input": "滑鼠裡面的矽二極體壞了,導致游標解析度降低。", + "expected": { + "tw2sp": "鼠标里面的硅二极管坏了,导致光标分辨率降低。" + } + }, + { + "id": "case_046", + "input": "我們在寮國的伺服器的硬碟需要使用網際網路演算法軟體解決非同步的問題。", + "expected": { + "tw2sp": "我们在老挝的服务器的硬盘需要使用互联网算法软件解决异步的问题。" + } + }, + { + "id": "case_047", + "input": "為什麼你在床裡面睡著?", + "expected": { + "tw2sp": "为什么你在床里面睡着?" + } + }, + { + "id": "case_048", + "input": "用滑鼠點選正規表示式", + "expected": { + "tw2sp": "用鼠标点击正则表达式" + } + }, + { + "id": "case_049", + "input": "KB大橋也被視為帛琉人的後花園", + "expected": { + "tw2sp": "KB大桥也被视为帕劳人的后花园" + } + }, + { + "id": "case_050", + "input": "這個軟體裡有一套軟體動物的資料庫", + "expected": { + "tw2sp": "这个软件里有一套软体动物的数据库" + } + }, + { + "id": "case_051", + "input": "為了眾人化妝床頭裡面衛生,醞釀群峰鐵鉤嘆氣事件", + "expected": { + "tw2t": "爲了衆人化妝牀頭裏面衛生,醞釀羣峯鐵鉤嘆氣事件" + } + }, + { + "id": "case_052", + "input": "在廚房裡做手擀麵", + "expected": { + "tw2t": "在廚房裏做手擀麪" + } + }, + { + "id": "case_053", + "input": "台湾大学", + "expected": { + "s2t": "臺灣大學", + "s2tw": "台灣大學", + "s2twp": "台灣大學", + "s2hk": "台灣大學" + } + } + ] +} diff --git a/test/testcases/tw2s.ans b/test/testcases/tw2s.ans deleted file mode 100644 index b010f7b8b..000000000 --- a/test/testcases/tw2s.ans +++ /dev/null @@ -1 +0,0 @@ -着装著作污染虚伪发泄棱柱群众里面 \ No newline at end of file diff --git a/test/testcases/tw2s.in b/test/testcases/tw2s.in deleted file mode 100644 index 5595eb5f4..000000000 --- a/test/testcases/tw2s.in +++ /dev/null @@ -1 +0,0 @@ -著裝著作汙染虛偽發洩稜柱群眾裡面 \ No newline at end of file diff --git a/test/testcases/tw2sp.ans b/test/testcases/tw2sp.ans deleted file mode 100644 index 863e0ab8a..000000000 --- a/test/testcases/tw2sp.ans +++ /dev/null @@ -1,5 +0,0 @@ -鼠标里面的硅二极管坏了,导致光标分辨率降低。 -我们在老挝的服务器的硬盘需要使用互联网算法软件解决异步的问题。 -为什么你在床里面睡着? -用鼠标点击正则表达式 -KB大桥也被视为帕劳人的后花园 \ No newline at end of file diff --git a/test/testcases/tw2sp.in b/test/testcases/tw2sp.in deleted file mode 100644 index 9187a36cb..000000000 --- a/test/testcases/tw2sp.in +++ /dev/null @@ -1,5 +0,0 @@ -滑鼠裡面的矽二極體壞了,導致游標解析度降低。 -我們在寮國的伺服器的硬碟需要使用網際網路演算法軟體解決非同步的問題。 -為什麼你在床裡面睡著? -用滑鼠點選正規表示式 -KB大橋也被視為帛琉人的後花園 \ No newline at end of file diff --git a/test/testcases/tw2t.ans b/test/testcases/tw2t.ans deleted file mode 100644 index aa48668bd..000000000 --- a/test/testcases/tw2t.ans +++ /dev/null @@ -1,3 +0,0 @@ -爲了衆人化妝牀頭裏面衛生,醞釀羣峯鐵鉤嘆氣事件 -在廚房裏做手擀麪 -想到自己一緊張就口吃,我就沒胃口喫飯 \ No newline at end of file diff --git a/test/testcases/tw2t.in b/test/testcases/tw2t.in deleted file mode 100644 index be88d1bf2..000000000 --- a/test/testcases/tw2t.in +++ /dev/null @@ -1,3 +0,0 @@ -為了眾人化妝床頭裡面衛生,醞釀群峰鐵鉤嘆氣事件 -在廚房裡做手擀麵 -想到自己一緊張就口吃,我就沒胃口吃飯 \ No newline at end of file diff --git a/wasm-lib/.gitignore b/wasm-lib/.gitignore new file mode 100644 index 000000000..d14b1f81a --- /dev/null +++ b/wasm-lib/.gitignore @@ -0,0 +1,39 @@ +# Emscripten cache +.emcache + +# Node modules +node_modules/ + +# Build intermediates (not committed) +build/ + +# Dist outputs (keep in git for npm package) + +# Logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* + +# OS files +.DS_Store +Thumbs.db + +# Editor directories and files +.vscode/* +!.vscode/extensions.json +.idea +*.swp +*.swo +*~ + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Temporary files +*.tmp +.cache/ diff --git a/wasm-lib/LICENSE b/wasm-lib/LICENSE new file mode 100644 index 000000000..7be6caebd --- /dev/null +++ b/wasm-lib/LICENSE @@ -0,0 +1,176 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/wasm-lib/NOTICE b/wasm-lib/NOTICE new file mode 100644 index 000000000..fcc5324ca --- /dev/null +++ b/wasm-lib/NOTICE @@ -0,0 +1,2 @@ +Modifications and additional code +Copyright © 2025 Frank Lin. diff --git a/wasm-lib/README.md b/wasm-lib/README.md new file mode 100644 index 000000000..fff7a8f7d --- /dev/null +++ b/wasm-lib/README.md @@ -0,0 +1,129 @@ +# opencc-wasm + +This package provides a WebAssembly backend for OpenCC, fully compatible with the `opencc-js` public API. It bundles the OpenCC C++ core (plus marisa) compiled via Emscripten, plus the official OpenCC configs and prebuilt `.ocd2` dictionaries (placed under `dist/data/` at build time). +License: Apache-2.0 (see LICENSE). + +## Features +- Same API surface as `opencc-js`: `OpenCC.Converter`, `CustomConverter`, `ConverterFactory`, and locale presets. +- No native bindings required; runs in Node.js and modern browsers (ESM), with a CommonJS build for legacy `require`. +- On-demand loading of configs and dictionaries from the package’s `data/` directory into the Emscripten FS; each config/dict is cached after first use. + +## Installation +```bash +npm install opencc-wasm +``` + +## Usage +```js +import OpenCC from "opencc-wasm"; + +// Convert Traditional Chinese (HK) to Simplified (CN) +const converter = OpenCC.Converter({ from: "hk", to: "cn" }); +console.log(await converter("漢語")); // => 汉语 + +// Custom dictionary +const custom = OpenCC.CustomConverter([ + ["“", "「"], + ["”", "」"], + ["‘", "『"], + ["’", "』"], +]); +console.log(custom("悟空道:“师父又来了。怎么叫做‘水中捞月’?”")); +// => 悟空道:「師父又來了。怎麼叫做『水中撈月』?」 +``` + +### Node (CommonJS) +```js +const OpenCC = require("opencc-wasm").default; +``` + +## Build + +The project uses a two-stage build process with semantic separation: + +### Stage 1: Build WASM (intermediate artifacts) + +```bash +./build.sh +``` + +Compiles OpenCC + marisa-trie to WASM and generates intermediate build artifacts in `build/`: +- `build/opencc-wasm.esm.js` - ESM WASM glue (for tests/development) +- `build/opencc-wasm.cjs` - CJS WASM glue (for tests/development) +- `build/opencc-wasm.wasm` - WASM binary + +**Semantic: `build/` = internal intermediate artifacts, not for publishing** + +### Stage 2: Build API wrappers (publishable dist) + +```bash +node scripts/build-api.js +``` + +Generates publishable distribution in `dist/`: +- Copies WASM artifacts from `build/` to `dist/esm/` and `dist/cjs/` +- Transforms source `index.js` to `dist/esm/index.js` with production paths +- Generates `dist/cjs/index.cjs` with CJS-compatible wrapper +- Copies data files to `dist/data/` + +**Semantic: `dist/` = publishable artifacts for npm** + +### Complete build + +```bash +npm run build +``` + +Runs both stages automatically. + +## Testing +```bash +npm test +``` + +Tests import from source `index.js`, which references `build/` artifacts. +This ensures tests validate the actual build output, not stale dist files. + +Runs the upstream OpenCC testcases (converted to JSON) against the WASM build. + +## Project Structure + +``` +wasm-lib/ +├── build/ ← Intermediate WASM artifacts (gitignored, for tests) +│ ├── opencc-wasm.esm.js +│ ├── opencc-wasm.cjs +│ └── opencc-wasm.wasm +├── dist/ ← Publishable distribution (committed to git) +│ ├── esm/ +│ │ ├── index.js +│ │ └── opencc-wasm.js +│ ├── cjs/ +│ │ ├── index.cjs +│ │ └── opencc-wasm.cjs +│ ├── opencc-wasm.wasm +│ └── data/ ← OpenCC config + dict files +├── index.js ← Source API (references build/ for tests) +├── index.d.ts ← TypeScript definitions +└── scripts/ + └── build-api.js ← Transforms build/ → dist/ +``` + +**Invariants:** +- Tests import source (`index.js`) → loads from `build/` +- Published package exports `dist/` only +- `build/` = internal, `dist/` = publishable + +## Notes +- Internally uses persistent OpenCC handles (`opencc_create/convert/destroy`) to avoid reloading configs. +- Dictionaries are written under `/data/dict/` in the virtual FS; configs under `/data/config/`. Paths inside configs are rewritten automatically. +- Memory grows on demand (`ALLOW_MEMORY_GROWTH=1`); no native dependencies needed. +- Performance note: opencc-wasm focuses on fidelity and compatibility (uses official configs and `.ocd2`, matches Node OpenCC output 1:1). Raw throughput can be slower than pure JS implementations like `opencc-js`, but the WASM version guarantees full OpenCC behavior and config coverage. + +## 0.2.1 changes +- Ship both wasm filenames (`opencc-wasm.wasm` and `opencc-wasm.esm.wasm`) in `dist/` so either glue name resolves without patches; glues remain at `dist/esm/opencc-wasm.js` and `dist/cjs/opencc-wasm.cjs`. + +## 0.2.0 changes +- Conversion rules and bundled dictionaries are rebuilt from OpenCC commit [`36c7cbbc`](https://github.com/frankslin/OpenCC/commit/36c7cbbc9702d2a46a89ea7a55ff8ba5656455df). This aligns the WASM build with the upstream configs in that revision (including updated `.ocd2` data). +- Output layout now mirrors the new `dist/` structure: ESM glue under `dist/esm/`, CJS glue under `dist/cjs/`, shared `opencc-wasm.wasm` at `dist/opencc-wasm.wasm`, and configs/dicts in `dist/data/`. Adjust your bundler/static hosting paths accordingly. +- Tests are rewritten to use `node:test` with data-driven cases (`test/testcases.json`) instead of ad-hoc assertions, keeping coverage aligned with upstream OpenCC fixtures. diff --git a/wasm-lib/build.sh b/wasm-lib/build.sh new file mode 100755 index 000000000..a4c540899 --- /dev/null +++ b/wasm-lib/build.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash +set -euo pipefail + +# 输出目录(中间构建产物) +BUILD_DIR="build" +mkdir -p "${BUILD_DIR}" + +# 单独的 emcc 缓存目录,避免权限问题 +export EM_CACHE="$(pwd)/.emcache" +mkdir -p "${EM_CACHE}" + +# OpenCC 源码路径(相对仓库根) +OPENCC_SRC_DIR=".." + +# 最小依赖的 OpenCC 源文件 +OPENCC_SRCS=( + ${OPENCC_SRC_DIR}/src/BinaryDict.cpp + ${OPENCC_SRC_DIR}/src/Config.cpp + ${OPENCC_SRC_DIR}/src/Conversion.cpp + ${OPENCC_SRC_DIR}/src/ConversionChain.cpp + ${OPENCC_SRC_DIR}/src/Converter.cpp + ${OPENCC_SRC_DIR}/src/Dict.cpp + ${OPENCC_SRC_DIR}/src/DictEntry.cpp + ${OPENCC_SRC_DIR}/src/DictGroup.cpp + ${OPENCC_SRC_DIR}/src/Lexicon.cpp + ${OPENCC_SRC_DIR}/src/MarisaDict.cpp + ${OPENCC_SRC_DIR}/src/MaxMatchSegmentation.cpp + ${OPENCC_SRC_DIR}/src/SerializedValues.cpp + ${OPENCC_SRC_DIR}/src/SimpleConverter.cpp + ${OPENCC_SRC_DIR}/src/TextDict.cpp + ${OPENCC_SRC_DIR}/src/UTF8StringSlice.cpp + ${OPENCC_SRC_DIR}/src/UTF8Util.cpp +) + +# marisa trie 源文件 +MARISA_DIR="${OPENCC_SRC_DIR}/deps/marisa-0.2.6" +MARISA_SRCS=( + ${MARISA_DIR}/lib/marisa/agent.cc + ${MARISA_DIR}/lib/marisa/keyset.cc + ${MARISA_DIR}/lib/marisa/trie.cc + ${MARISA_DIR}/lib/marisa/grimoire/io/reader.cc + ${MARISA_DIR}/lib/marisa/grimoire/io/writer.cc + ${MARISA_DIR}/lib/marisa/grimoire/io/mapper.cc + ${MARISA_DIR}/lib/marisa/grimoire/trie/louds-trie.cc + ${MARISA_DIR}/lib/marisa/grimoire/trie/tail.cc + ${MARISA_DIR}/lib/marisa/grimoire/vector/bit-vector.cc +) + +# 头文件搜索路径 +INCLUDE_FLAGS=( + -I${OPENCC_SRC_DIR}/src + -I${MARISA_DIR}/include + -I${MARISA_DIR}/lib + -I${OPENCC_SRC_DIR}/deps/rapidjson-1.1.0 +) + +# 编译选项: +# -DOPENCC_WASM_WITH_OPENCC: 启用真实 OpenCC 逻辑 +# -s MODULARIZE=1: 生成可按需加载的模块工厂函数 +# -s EXPORT_NAME: 自定义工厂函数名,便于前端调用 +# -s EXPORTED_FUNCTIONS: 导出 C 接口(需要前缀下划线) +# -s EXPORTED_RUNTIME_METHODS: 暴露 cwrap/FS/ccall,便于 JS 侧调用 +# -O2: 体积/性能权衡 +COMMON_FLAGS=( + -DOPENCC_WASM_WITH_OPENCC + "${OPENCC_SRCS[@]}" + "${MARISA_SRCS[@]}" + src/main.cpp + "${INCLUDE_FLAGS[@]}" + -fexceptions + -sDISABLE_EXCEPTION_CATCHING=0 + -O2 + -s WASM=1 + -s MODULARIZE=1 + -s FORCE_FILESYSTEM=1 + -s ALLOW_MEMORY_GROWTH=1 + -s EXPORT_NAME="createOpenCCWasm" + -s EXPORTED_FUNCTIONS="['_opencc_create','_opencc_convert','_opencc_destroy']" + -s EXPORTED_RUNTIME_METHODS="['cwrap','FS','ccall']" +) + +# ES module(适合浏览器 / 现代 bundler) +em++ \ + "${COMMON_FLAGS[@]}" \ + -s EXPORT_ES6=1 \ + -o "${BUILD_DIR}/opencc-wasm.esm.js" + +# CommonJS(适合 Node.js require) +em++ \ + "${COMMON_FLAGS[@]}" \ + -s EXPORT_ES6=0 \ + -s ENVIRONMENT='node' \ + -o "${BUILD_DIR}/opencc-wasm.cjs" + +# 兼容旧命名:复制一份同内容的 wasm 为 opencc-wasm.esm.wasm(两份 glue 可直接引用) +cp "${BUILD_DIR}/opencc-wasm.wasm" "${BUILD_DIR}/opencc-wasm.esm.wasm" + +# WASM 文件由 emcc 自动生成 +echo "Build complete. Intermediate files in ${BUILD_DIR}/" +echo " - ${BUILD_DIR}/opencc-wasm.esm.js (ESM glue for tests/rebuild)" +echo " - ${BUILD_DIR}/opencc-wasm.cjs (CJS glue for tests/rebuild)" +echo " - ${BUILD_DIR}/opencc-wasm.wasm (WASM binary)" +echo " - ${BUILD_DIR}/opencc-wasm.esm.wasm (alias, same content as above)" +echo "" +echo "Run 'node scripts/build-api.js' to generate dist/ for publishing." diff --git a/wasm-lib/data/config/hk2s.json b/wasm-lib/data/config/hk2s.json new file mode 100644 index 000000000..cf0e9b975 --- /dev/null +++ b/wasm-lib/data/config/hk2s.json @@ -0,0 +1,33 @@ +{ + "name": "Traditional Chinese (Hong Kong variant) to Simplified Chinese", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "TSPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "HKVariantsRevPhrases.ocd2" + }, { + "type": "ocd2", + "file": "HKVariantsRev.ocd2" + }] + } + }, { + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "TSPhrases.ocd2" + }, { + "type": "ocd2", + "file": "TSCharacters.ocd2" + }] + } + }] +} diff --git a/wasm-lib/data/config/hk2t.json b/wasm-lib/data/config/hk2t.json new file mode 100644 index 000000000..0d47b9174 --- /dev/null +++ b/wasm-lib/data/config/hk2t.json @@ -0,0 +1,22 @@ +{ + "name": "Traditional Chinese (Hong Kong variant) to Traditional Chinese", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "HKVariantsRevPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "HKVariantsRevPhrases.ocd2" + }, { + "type": "ocd2", + "file": "HKVariantsRev.ocd2" + }] + } + }] +} diff --git a/wasm-lib/data/config/jp2t.json b/wasm-lib/data/config/jp2t.json new file mode 100644 index 000000000..025d89197 --- /dev/null +++ b/wasm-lib/data/config/jp2t.json @@ -0,0 +1,25 @@ +{ + "name": "New Japanese Kanji (Shinjitai) to Traditional Chinese Characters (Kyūjitai)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "JPShinjitaiPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "JPShinjitaiPhrases.ocd2" + }, { + "type": "ocd2", + "file": "JPShinjitaiCharacters.ocd2" + }, { + "type": "ocd2", + "file": "JPVariantsRev.ocd2" + }] + } + }] +} diff --git a/wasm-lib/data/config/s2hk.json b/wasm-lib/data/config/s2hk.json new file mode 100644 index 000000000..fcaa017ee --- /dev/null +++ b/wasm-lib/data/config/s2hk.json @@ -0,0 +1,27 @@ +{ + "name": "Simplified Chinese to Traditional Chinese (Hong Kong variant)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "STPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "STPhrases.ocd2" + }, { + "type": "ocd2", + "file": "STCharacters.ocd2" + }] + } + }, { + "dict": { + "type": "ocd2", + "file": "HKVariants.ocd2" + } + }] +} diff --git a/wasm-lib/data/config/s2t.json b/wasm-lib/data/config/s2t.json new file mode 100644 index 000000000..87516acbd --- /dev/null +++ b/wasm-lib/data/config/s2t.json @@ -0,0 +1,22 @@ +{ + "name": "Simplified Chinese to Traditional Chinese", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "STPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "STPhrases.ocd2" + }, { + "type": "ocd2", + "file": "STCharacters.ocd2" + }] + } + }] +} diff --git a/wasm-lib/data/config/s2tw.json b/wasm-lib/data/config/s2tw.json new file mode 100644 index 000000000..2a3d7656b --- /dev/null +++ b/wasm-lib/data/config/s2tw.json @@ -0,0 +1,27 @@ +{ + "name": "Simplified Chinese to Traditional Chinese (Taiwan standard)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "STPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "STPhrases.ocd2" + }, { + "type": "ocd2", + "file": "STCharacters.ocd2" + }] + } + }, { + "dict": { + "type": "ocd2", + "file": "TWVariants.ocd2" + } + }] +} diff --git a/wasm-lib/data/config/s2twp.json b/wasm-lib/data/config/s2twp.json new file mode 100644 index 000000000..2f36e9352 --- /dev/null +++ b/wasm-lib/data/config/s2twp.json @@ -0,0 +1,32 @@ +{ + "name": "Simplified Chinese to Traditional Chinese (Taiwan standard, with phrases)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "STPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "STPhrases.ocd2" + }, { + "type": "ocd2", + "file": "STCharacters.ocd2" + }] + } + }, { + "dict": { + "type": "ocd2", + "file": "TWPhrases.ocd2" + } + }, { + "dict": { + "type": "ocd2", + "file": "TWVariants.ocd2" + } + }] +} diff --git a/wasm-lib/data/config/t2hk.json b/wasm-lib/data/config/t2hk.json new file mode 100644 index 000000000..519d4a3fd --- /dev/null +++ b/wasm-lib/data/config/t2hk.json @@ -0,0 +1,16 @@ +{ + "name": "Traditional Chinese to Traditional Chinese (Hong Kong variant)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "HKVariants.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "ocd2", + "file": "HKVariants.ocd2" + } + }] +} diff --git a/wasm-lib/data/config/t2jp.json b/wasm-lib/data/config/t2jp.json new file mode 100644 index 000000000..7a43217ff --- /dev/null +++ b/wasm-lib/data/config/t2jp.json @@ -0,0 +1,16 @@ +{ + "name": "Traditional Chinese Characters (Kyūjitai) to New Japanese Kanji (Shinjitai)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "JPVariants.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "ocd2", + "file": "JPVariants.ocd2" + } + }] +} diff --git a/wasm-lib/data/config/t2s.json b/wasm-lib/data/config/t2s.json new file mode 100644 index 000000000..06cf5f58e --- /dev/null +++ b/wasm-lib/data/config/t2s.json @@ -0,0 +1,22 @@ +{ + "name": "Traditional Chinese to Simplified Chinese", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "TSPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "TSPhrases.ocd2" + }, { + "type": "ocd2", + "file": "TSCharacters.ocd2" + }] + } + }] +} diff --git a/wasm-lib/data/config/t2tw.json b/wasm-lib/data/config/t2tw.json new file mode 100644 index 000000000..0394f600d --- /dev/null +++ b/wasm-lib/data/config/t2tw.json @@ -0,0 +1,16 @@ +{ + "name": "Traditional Chinese to Traditional Chinese (Taiwan standard)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "TWVariants.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "ocd2", + "file": "TWVariants.ocd2" + } + }] +} diff --git a/wasm-lib/data/config/tw2s.json b/wasm-lib/data/config/tw2s.json new file mode 100644 index 000000000..4f554393e --- /dev/null +++ b/wasm-lib/data/config/tw2s.json @@ -0,0 +1,33 @@ +{ + "name": "Traditional Chinese (Taiwan standard) to Simplified Chinese", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "TSPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "TWVariantsRevPhrases.ocd2" + }, { + "type": "ocd2", + "file": "TWVariantsRev.ocd2" + }] + } + }, { + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "TSPhrases.ocd2" + }, { + "type": "ocd2", + "file": "TSCharacters.ocd2" + }] + } + }] +} diff --git a/wasm-lib/data/config/tw2sp.json b/wasm-lib/data/config/tw2sp.json new file mode 100644 index 000000000..64eb9d977 --- /dev/null +++ b/wasm-lib/data/config/tw2sp.json @@ -0,0 +1,36 @@ +{ + "name": "Traditional Chinese (Taiwan standard) to Simplified Chinese (with phrases)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "TSPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "TWPhrasesRev.ocd2" + }, { + "type": "ocd2", + "file": "TWVariantsRevPhrases.ocd2" + }, { + "type": "ocd2", + "file": "TWVariantsRev.ocd2" + }] + } + }, { + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "TSPhrases.ocd2" + }, { + "type": "ocd2", + "file": "TSCharacters.ocd2" + }] + } + }] +} diff --git a/wasm-lib/data/config/tw2t.json b/wasm-lib/data/config/tw2t.json new file mode 100644 index 000000000..ad5295b65 --- /dev/null +++ b/wasm-lib/data/config/tw2t.json @@ -0,0 +1,22 @@ +{ + "name": "Traditional Chinese (Taiwan standard) to Traditional Chinese", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "TWVariantsRevPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "TWVariantsRevPhrases.ocd2" + }, { + "type": "ocd2", + "file": "TWVariantsRev.ocd2" + }] + } + }] +} diff --git a/wasm-lib/data/dict/HKVariants.ocd2 b/wasm-lib/data/dict/HKVariants.ocd2 new file mode 100644 index 000000000..fa2edde1c Binary files /dev/null and b/wasm-lib/data/dict/HKVariants.ocd2 differ diff --git a/wasm-lib/data/dict/HKVariantsRev.ocd2 b/wasm-lib/data/dict/HKVariantsRev.ocd2 new file mode 100644 index 000000000..abc97a940 Binary files /dev/null and b/wasm-lib/data/dict/HKVariantsRev.ocd2 differ diff --git a/wasm-lib/data/dict/HKVariantsRevPhrases.ocd2 b/wasm-lib/data/dict/HKVariantsRevPhrases.ocd2 new file mode 100644 index 000000000..848227724 Binary files /dev/null and b/wasm-lib/data/dict/HKVariantsRevPhrases.ocd2 differ diff --git a/wasm-lib/data/dict/JPShinjitaiCharacters.ocd2 b/wasm-lib/data/dict/JPShinjitaiCharacters.ocd2 new file mode 100644 index 000000000..d27801472 Binary files /dev/null and b/wasm-lib/data/dict/JPShinjitaiCharacters.ocd2 differ diff --git a/wasm-lib/data/dict/JPShinjitaiPhrases.ocd2 b/wasm-lib/data/dict/JPShinjitaiPhrases.ocd2 new file mode 100644 index 000000000..e4c823f11 Binary files /dev/null and b/wasm-lib/data/dict/JPShinjitaiPhrases.ocd2 differ diff --git a/wasm-lib/data/dict/JPVariants.ocd2 b/wasm-lib/data/dict/JPVariants.ocd2 new file mode 100644 index 000000000..132be9a0e Binary files /dev/null and b/wasm-lib/data/dict/JPVariants.ocd2 differ diff --git a/wasm-lib/data/dict/JPVariantsRev.ocd2 b/wasm-lib/data/dict/JPVariantsRev.ocd2 new file mode 100644 index 000000000..9c9f87e28 Binary files /dev/null and b/wasm-lib/data/dict/JPVariantsRev.ocd2 differ diff --git a/wasm-lib/data/dict/STCharacters.ocd2 b/wasm-lib/data/dict/STCharacters.ocd2 new file mode 100644 index 000000000..5f355eb25 Binary files /dev/null and b/wasm-lib/data/dict/STCharacters.ocd2 differ diff --git a/wasm-lib/data/dict/STPhrases.ocd2 b/wasm-lib/data/dict/STPhrases.ocd2 new file mode 100644 index 000000000..811eff0cc Binary files /dev/null and b/wasm-lib/data/dict/STPhrases.ocd2 differ diff --git a/wasm-lib/data/dict/TSCharacters.ocd2 b/wasm-lib/data/dict/TSCharacters.ocd2 new file mode 100644 index 000000000..3c0d60a96 Binary files /dev/null and b/wasm-lib/data/dict/TSCharacters.ocd2 differ diff --git a/wasm-lib/data/dict/TSPhrases.ocd2 b/wasm-lib/data/dict/TSPhrases.ocd2 new file mode 100644 index 000000000..0007aa199 Binary files /dev/null and b/wasm-lib/data/dict/TSPhrases.ocd2 differ diff --git a/wasm-lib/data/dict/TWPhrases.ocd2 b/wasm-lib/data/dict/TWPhrases.ocd2 new file mode 100644 index 000000000..5f9014dbb Binary files /dev/null and b/wasm-lib/data/dict/TWPhrases.ocd2 differ diff --git a/wasm-lib/data/dict/TWPhrasesRev.ocd2 b/wasm-lib/data/dict/TWPhrasesRev.ocd2 new file mode 100644 index 000000000..183e7c442 Binary files /dev/null and b/wasm-lib/data/dict/TWPhrasesRev.ocd2 differ diff --git a/wasm-lib/data/dict/TWVariants.ocd2 b/wasm-lib/data/dict/TWVariants.ocd2 new file mode 100644 index 000000000..8d7439558 Binary files /dev/null and b/wasm-lib/data/dict/TWVariants.ocd2 differ diff --git a/wasm-lib/data/dict/TWVariantsRev.ocd2 b/wasm-lib/data/dict/TWVariantsRev.ocd2 new file mode 100644 index 000000000..2546811c4 Binary files /dev/null and b/wasm-lib/data/dict/TWVariantsRev.ocd2 differ diff --git a/wasm-lib/data/dict/TWVariantsRevPhrases.ocd2 b/wasm-lib/data/dict/TWVariantsRevPhrases.ocd2 new file mode 100644 index 000000000..95f26afcc Binary files /dev/null and b/wasm-lib/data/dict/TWVariantsRevPhrases.ocd2 differ diff --git a/wasm-lib/dist/cjs/index.cjs b/wasm-lib/dist/cjs/index.cjs new file mode 100644 index 000000000..d373f98f1 --- /dev/null +++ b/wasm-lib/dist/cjs/index.cjs @@ -0,0 +1,172 @@ + +const fs = require("node:fs"); +const { fileURLToPath } = require("node:url"); +const { default: fetchFn = fetch } = {}; + +const BASE_URL = new (require("node:url").URL)("../", import.meta.url || "file://" + __filename); + +const readFileText = (url) => fs.readFileSync(fileURLToPath(url), "utf-8"); +const readFileBuffer = (url) => fs.readFileSync(fileURLToPath(url)); + +const CONFIG_MAP = { + cn: { t: "s2t.json", tw: "s2tw.json", hk: "s2hk.json", cn: null }, + tw: { cn: "tw2s.json", t: "tw2t.json", tw: null }, + hk: { cn: "hk2s.json", t: "hk2t.json", hk: null }, + t: { cn: "t2s.json", tw: "t2tw.json", hk: "t2hk.json", jp: "t2jp.json", t: null }, + jp: { t: "jp2t.json" }, +}; + +const loadedConfigs = new Set(); +const loadedDicts = new Set(); +const handles = new Map(); +let modulePromise = null; +let api = null; + +async function getModule() { + if (!modulePromise) { + const wasmUrl = new URL("./opencc-wasm.cjs", import.meta.url || "file://" + __filename); + const create = require(wasmUrl); + modulePromise = create(); + } + return modulePromise; +} + +async function getApi() { + const mod = await getModule(); + if (!api) { + api = { + create: mod.cwrap("opencc_create", "number", ["string"]), + convert: mod.cwrap("opencc_convert", "string", ["number", "string"]), + destroy: mod.cwrap("opencc_destroy", null, ["number"]), + }; + } + return { mod, api }; +} + +function collectOcd2Files(node, acc) { + if (!node || typeof node !== "object") return; + if (node.type === "ocd2" && node.file) acc.add(node.file); + if (node.type === "group" && Array.isArray(node.dicts)) { + node.dicts.forEach((d) => collectOcd2Files(d, acc)); + } +} + +async function fetchText(urlObj) { + if (urlObj.protocol === "file:") return readFileText(urlObj); + const resp = await fetch(urlObj.href); + if (!resp.ok) throw new Error("Fetch " + urlObj + " failed: " + resp.status); + return resp.text(); +} +async function fetchBuffer(urlObj) { + if (urlObj.protocol === "file:") return new Uint8Array(readFileBuffer(urlObj)); + const resp = await fetch(urlObj.href); + if (!resp.ok) throw new Error("Fetch " + urlObj + " failed: " + resp.status); + return new Uint8Array(await resp.arrayBuffer()); +} + +async function ensureConfig(configName) { + if (handles.has(configName)) return handles.get(configName); + const { mod, api: apiFns } = await getApi(); + mod.FS.mkdirTree("/data/config"); + mod.FS.mkdirTree("/data/dict"); + const cfgUrl = new URL("../data/config/" + configName, BASE_URL); + const cfgJson = JSON.parse(await fetchText(cfgUrl)); + + const dicts = new Set(); + collectOcd2Files(cfgJson.segmentation?.dict, dicts); + if (Array.isArray(cfgJson.conversion_chain)) { + cfgJson.conversion_chain.forEach((item) => collectOcd2Files(item?.dict, dicts)); + } + for (const file of dicts) { + if (loadedDicts.has(file)) continue; + const dictUrl = new URL("../data/dict/" + file, BASE_URL); + const buf = await fetchBuffer(dictUrl); + mod.FS.writeFile("/data/dict/" + file, buf); + loadedDicts.add(file); + } + const patchPaths = (node) => { + if (!node || typeof node !== "object") return; + if (node.type === "ocd2" && node.file) node.file = "/data/dict/" + node.file; + if (node.type === "group" && Array.isArray(node.dicts)) node.dicts.forEach(patchPaths); + }; + patchPaths(cfgJson.segmentation?.dict); + if (Array.isArray(cfgJson.conversion_chain)) { + cfgJson.conversion_chain.forEach((item) => patchPaths(item?.dict)); + } + mod.FS.writeFile("/data/config/" + configName, JSON.stringify(cfgJson)); + loadedConfigs.add(configName); + + const handle = apiFns.create("/data/config/" + configName); + if (!handle || handle < 0) throw new Error("opencc_create failed for " + configName); + handles.set(configName, handle); + return handle; +} + +function resolveConfig(from, to) { + const f = (from || "").toLowerCase(); + const t = (to || "").toLowerCase(); + const m = CONFIG_MAP[f]; + if (!m || !(t in m)) throw new Error("Unsupported conversion from '" + from + "' to '" + to + "'"); + return m[t]; +} + +function createConverter({ from, to, config }) { + const configName = config || resolveConfig(from, to); + return async (text) => { + if (configName === null) return text; + const handle = await ensureConfig(configName); + const { api: apiFns } = await getApi(); + return apiFns.convert(handle, text); + }; +} + +function CustomConverter(dictOrString) { + let pairs = []; + if (typeof dictOrString === "string") { + pairs = dictOrString + .split("|") + .map((seg) => seg.trim()) + .filter(Boolean) + .map((seg) => seg.split(/\s+/)) + .filter((arr) => arr.length >= 2) + .map(([a, b]) => [a, b]); + } else if (Array.isArray(dictOrString)) { + pairs = dictOrString; + } + pairs.sort((a, b) => b[0].length - a[0].length); + return (text) => { + let out = text; + for (const [src, dst] of pairs) { + out = out.split(src).join(dst); + } + return out; + }; +} + +function ConverterFactory(fromLocale, toLocale, extraDicts = []) { + const conv = createConverter({ from: fromLocale, to: toLocale }); + const extras = extraDicts.map((d) => CustomConverter(d)); + return async (text) => { + let result = await conv(text); + extras.forEach((fn) => { + result = fn(result); + }); + return result; + }; +} + +const OpenCC = { + Converter(opts) { + const fn = createConverter(opts); + return (text) => fn(text); + }, + CustomConverter, + ConverterFactory, + Locale: { + from: { cn: "cn", tw: "t", hk: "hk", jp: "jp", t: "t" }, + to: { cn: "cn", tw: "tw", hk: "hk", jp: "jp", t: "t" }, + }, +}; + +module.exports = OpenCC; +module.exports.default = OpenCC; diff --git a/wasm-lib/dist/cjs/opencc-wasm.cjs b/wasm-lib/dist/cjs/opencc-wasm.cjs new file mode 100644 index 000000000..bcf71e920 --- /dev/null +++ b/wasm-lib/dist/cjs/opencc-wasm.cjs @@ -0,0 +1,2 @@ +async function createOpenCCWasm(moduleArg={}){var moduleRtn;var Module=moduleArg;var ENVIRONMENT_IS_WORKER=false;var ENVIRONMENT_IS_NODE=true;var arguments_=[];var thisProgram="./this.program";var quit_=(status,toThrow)=>{throw toThrow};var _scriptName;if(typeof __filename!="undefined"){_scriptName=__filename}else{}var scriptDirectory="";function locateFile(path){if(Module["locateFile"]){return Module["locateFile"](path,scriptDirectory)}return scriptDirectory+path}var readAsync,readBinary;if(ENVIRONMENT_IS_NODE){var fs=require("fs");scriptDirectory=__dirname+"/";readBinary=filename=>{filename=isFileURI(filename)?new URL(filename):filename;var ret=fs.readFileSync(filename);return ret};readAsync=async(filename,binary=true)=>{filename=isFileURI(filename)?new URL(filename):filename;var ret=fs.readFileSync(filename,binary?undefined:"utf8");return ret};if(process.argv.length>1){thisProgram=process.argv[1].replace(/\\/g,"/")}arguments_=process.argv.slice(2);quit_=(status,toThrow)=>{process.exitCode=status;throw toThrow}}else{}var out=console.log.bind(console);var err=console.error.bind(console);var wasmBinary;var ABORT=false;var isFileURI=filename=>filename.startsWith("file://");var readyPromiseResolve,readyPromiseReject;var HEAP8,HEAPU8,HEAP16,HEAPU16,HEAP32,HEAPU32,HEAPF32,HEAPF64;var HEAP64,HEAPU64;var runtimeInitialized=false;function updateMemoryViews(){var b=wasmMemory.buffer;HEAP8=new Int8Array(b);HEAP16=new Int16Array(b);HEAPU8=new Uint8Array(b);HEAPU16=new Uint16Array(b);HEAP32=new Int32Array(b);HEAPU32=new Uint32Array(b);HEAPF32=new Float32Array(b);HEAPF64=new Float64Array(b);HEAP64=new BigInt64Array(b);HEAPU64=new BigUint64Array(b)}function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="function")Module["preRun"]=[Module["preRun"]];while(Module["preRun"].length){addOnPreRun(Module["preRun"].shift())}}callRuntimeCallbacks(onPreRuns)}function initRuntime(){runtimeInitialized=true;if(!Module["noFSInit"]&&!FS.initialized)FS.init();TTY.init();wasmExports["__wasm_call_ctors"]();FS.ignorePermissions=false}function postRun(){if(Module["postRun"]){if(typeof Module["postRun"]=="function")Module["postRun"]=[Module["postRun"]];while(Module["postRun"].length){addOnPostRun(Module["postRun"].shift())}}callRuntimeCallbacks(onPostRuns)}function abort(what){Module["onAbort"]?.(what);what="Aborted("+what+")";err(what);ABORT=true;what+=". Build with -sASSERTIONS for more info.";var e=new WebAssembly.RuntimeError(what);readyPromiseReject?.(e);throw e}var wasmBinaryFile;function findWasmBinary(){return locateFile("opencc-wasm.wasm")}function getBinarySync(file){if(file==wasmBinaryFile&&wasmBinary){return new Uint8Array(wasmBinary)}if(readBinary){return readBinary(file)}throw"both async and sync fetching of the wasm failed"}async function getWasmBinary(binaryFile){if(!wasmBinary){try{var response=await readAsync(binaryFile);return new Uint8Array(response)}catch{}}return getBinarySync(binaryFile)}async function instantiateArrayBuffer(binaryFile,imports){try{var binary=await getWasmBinary(binaryFile);var instance=await WebAssembly.instantiate(binary,imports);return instance}catch(reason){err(`failed to asynchronously prepare wasm: ${reason}`);abort(reason)}}async function instantiateAsync(binary,binaryFile,imports){if(!binary&&!ENVIRONMENT_IS_NODE){try{var response=fetch(binaryFile,{credentials:"same-origin"});var instantiationResult=await WebAssembly.instantiateStreaming(response,imports);return instantiationResult}catch(reason){err(`wasm streaming compile failed: ${reason}`);err("falling back to ArrayBuffer instantiation")}}return instantiateArrayBuffer(binaryFile,imports)}function getWasmImports(){var imports={env:wasmImports,wasi_snapshot_preview1:wasmImports};return imports}async function createWasm(){function receiveInstance(instance,module){wasmExports=instance.exports;assignWasmExports(wasmExports);updateMemoryViews();return wasmExports}function receiveInstantiationResult(result){return receiveInstance(result["instance"])}var info=getWasmImports();if(Module["instantiateWasm"]){return new Promise((resolve,reject)=>{Module["instantiateWasm"](info,(inst,mod)=>{resolve(receiveInstance(inst,mod))})})}wasmBinaryFile??=findWasmBinary();var result=await instantiateAsync(wasmBinary,wasmBinaryFile,info);var exports=receiveInstantiationResult(result);return exports}class ExitStatus{name="ExitStatus";constructor(status){this.message=`Program terminated with exit(${status})`;this.status=status}}var callRuntimeCallbacks=callbacks=>{while(callbacks.length>0){callbacks.shift()(Module)}};var onPostRuns=[];var addOnPostRun=cb=>onPostRuns.push(cb);var onPreRuns=[];var addOnPreRun=cb=>onPreRuns.push(cb);var noExitRuntime=true;var stackRestore=val=>__emscripten_stack_restore(val);var stackSave=()=>_emscripten_stack_get_current();var UTF8Decoder=globalThis.TextDecoder&&new TextDecoder;var findStringEnd=(heapOrArray,idx,maxBytesToRead,ignoreNul)=>{var maxIdx=idx+maxBytesToRead;if(ignoreNul)return maxIdx;while(heapOrArray[idx]&&!(idx>=maxIdx))++idx;return idx};var UTF8ArrayToString=(heapOrArray,idx=0,maxBytesToRead,ignoreNul)=>{var endPtr=findStringEnd(heapOrArray,idx,maxBytesToRead,ignoreNul);if(endPtr-idx>16&&heapOrArray.buffer&&UTF8Decoder){return UTF8Decoder.decode(heapOrArray.subarray(idx,endPtr))}var str="";while(idx>10,56320|ch&1023)}}return str};var UTF8ToString=(ptr,maxBytesToRead,ignoreNul)=>ptr?UTF8ArrayToString(HEAPU8,ptr,maxBytesToRead,ignoreNul):"";var ___assert_fail=(condition,filename,line,func)=>abort(`Assertion failed: ${UTF8ToString(condition)}, at: `+[filename?UTF8ToString(filename):"unknown filename",line,func?UTF8ToString(func):"unknown function"]);var exceptionCaught=[];var uncaughtExceptionCount=0;var ___cxa_begin_catch=ptr=>{var info=new ExceptionInfo(ptr);if(!info.get_caught()){info.set_caught(true);uncaughtExceptionCount--}info.set_rethrown(false);exceptionCaught.push(info);___cxa_increment_exception_refcount(ptr);return ___cxa_get_exception_ptr(ptr)};var exceptionLast=0;var ___cxa_end_catch=()=>{_setThrew(0,0);var info=exceptionCaught.pop();___cxa_decrement_exception_refcount(info.excPtr);exceptionLast=0};class ExceptionInfo{constructor(excPtr){this.excPtr=excPtr;this.ptr=excPtr-24}set_type(type){HEAPU32[this.ptr+4>>2]=type}get_type(){return HEAPU32[this.ptr+4>>2]}set_destructor(destructor){HEAPU32[this.ptr+8>>2]=destructor}get_destructor(){return HEAPU32[this.ptr+8>>2]}set_caught(caught){caught=caught?1:0;HEAP8[this.ptr+12]=caught}get_caught(){return HEAP8[this.ptr+12]!=0}set_rethrown(rethrown){rethrown=rethrown?1:0;HEAP8[this.ptr+13]=rethrown}get_rethrown(){return HEAP8[this.ptr+13]!=0}init(type,destructor){this.set_adjusted_ptr(0);this.set_type(type);this.set_destructor(destructor)}set_adjusted_ptr(adjustedPtr){HEAPU32[this.ptr+16>>2]=adjustedPtr}get_adjusted_ptr(){return HEAPU32[this.ptr+16>>2]}}var setTempRet0=val=>__emscripten_tempret_set(val);var findMatchingCatch=args=>{var thrown=exceptionLast;if(!thrown){setTempRet0(0);return 0}var info=new ExceptionInfo(thrown);info.set_adjusted_ptr(thrown);var thrownType=info.get_type();if(!thrownType){setTempRet0(0);return thrown}for(var caughtType of args){if(caughtType===0||caughtType===thrownType){break}var adjusted_ptr_addr=info.ptr+16;if(___cxa_can_catch(caughtType,thrownType,adjusted_ptr_addr)){setTempRet0(caughtType);return thrown}}setTempRet0(thrownType);return thrown};var ___cxa_find_matching_catch_2=()=>findMatchingCatch([]);var ___cxa_find_matching_catch_3=arg0=>findMatchingCatch([arg0]);var ___cxa_rethrow=()=>{var info=exceptionCaught.pop();if(!info){abort("no exception to throw")}var ptr=info.excPtr;if(!info.get_rethrown()){exceptionCaught.push(info);info.set_rethrown(true);info.set_caught(false);uncaughtExceptionCount++}exceptionLast=ptr;throw exceptionLast};var ___cxa_throw=(ptr,type,destructor)=>{var info=new ExceptionInfo(ptr);info.init(type,destructor);exceptionLast=ptr;uncaughtExceptionCount++;throw exceptionLast};var ___cxa_uncaught_exceptions=()=>uncaughtExceptionCount;var ___resumeException=ptr=>{if(!exceptionLast){exceptionLast=ptr}throw exceptionLast};var syscallGetVarargI=()=>{var ret=HEAP32[+SYSCALLS.varargs>>2];SYSCALLS.varargs+=4;return ret};var syscallGetVarargP=syscallGetVarargI;var PATH={isAbs:path=>path.charAt(0)==="/",splitPath:filename=>{var splitPathRe=/^(\/?|)([\s\S]*?)((?:\.{1,2}|[^\/]+?|)(\.[^.\/]*|))(?:[\/]*)$/;return splitPathRe.exec(filename).slice(1)},normalizeArray:(parts,allowAboveRoot)=>{var up=0;for(var i=parts.length-1;i>=0;i--){var last=parts[i];if(last==="."){parts.splice(i,1)}else if(last===".."){parts.splice(i,1);up++}else if(up){parts.splice(i,1);up--}}if(allowAboveRoot){for(;up;up--){parts.unshift("..")}}return parts},normalize:path=>{var isAbsolute=PATH.isAbs(path),trailingSlash=path.slice(-1)==="/";path=PATH.normalizeArray(path.split("/").filter(p=>!!p),!isAbsolute).join("/");if(!path&&!isAbsolute){path="."}if(path&&trailingSlash){path+="/"}return(isAbsolute?"/":"")+path},dirname:path=>{var result=PATH.splitPath(path),root=result[0],dir=result[1];if(!root&&!dir){return"."}if(dir){dir=dir.slice(0,-1)}return root+dir},basename:path=>path&&path.match(/([^\/]+|\/)\/*$/)[1],join:(...paths)=>PATH.normalize(paths.join("/")),join2:(l,r)=>PATH.normalize(l+"/"+r)};var initRandomFill=()=>{if(ENVIRONMENT_IS_NODE){var nodeCrypto=require("crypto");return view=>nodeCrypto.randomFillSync(view)}return view=>crypto.getRandomValues(view)};var randomFill=view=>{(randomFill=initRandomFill())(view)};var PATH_FS={resolve:(...args)=>{var resolvedPath="",resolvedAbsolute=false;for(var i=args.length-1;i>=-1&&!resolvedAbsolute;i--){var path=i>=0?args[i]:FS.cwd();if(typeof path!="string"){throw new TypeError("Arguments to path.resolve must be strings")}else if(!path){return""}resolvedPath=path+"/"+resolvedPath;resolvedAbsolute=PATH.isAbs(path)}resolvedPath=PATH.normalizeArray(resolvedPath.split("/").filter(p=>!!p),!resolvedAbsolute).join("/");return(resolvedAbsolute?"/":"")+resolvedPath||"."},relative:(from,to)=>{from=PATH_FS.resolve(from).slice(1);to=PATH_FS.resolve(to).slice(1);function trim(arr){var start=0;for(;start=0;end--){if(arr[end]!=="")break}if(start>end)return[];return arr.slice(start,end-start+1)}var fromParts=trim(from.split("/"));var toParts=trim(to.split("/"));var length=Math.min(fromParts.length,toParts.length);var samePartsLength=length;for(var i=0;i{var len=0;for(var i=0;i=55296&&c<=57343){len+=4;++i}else{len+=3}}return len};var stringToUTF8Array=(str,heap,outIdx,maxBytesToWrite)=>{if(!(maxBytesToWrite>0))return 0;var startIdx=outIdx;var endIdx=outIdx+maxBytesToWrite-1;for(var i=0;i=endIdx)break;heap[outIdx++]=u}else if(u<=2047){if(outIdx+1>=endIdx)break;heap[outIdx++]=192|u>>6;heap[outIdx++]=128|u&63}else if(u<=65535){if(outIdx+2>=endIdx)break;heap[outIdx++]=224|u>>12;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}else{if(outIdx+3>=endIdx)break;heap[outIdx++]=240|u>>18;heap[outIdx++]=128|u>>12&63;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63;i++}}heap[outIdx]=0;return outIdx-startIdx};var intArrayFromString=(stringy,dontAddNull,length)=>{var len=length>0?length:lengthBytesUTF8(stringy)+1;var u8array=new Array(len);var numBytesWritten=stringToUTF8Array(stringy,u8array,0,u8array.length);if(dontAddNull)u8array.length=numBytesWritten;return u8array};var FS_stdin_getChar=()=>{if(!FS_stdin_getChar_buffer.length){var result=null;if(ENVIRONMENT_IS_NODE){var BUFSIZE=256;var buf=Buffer.alloc(BUFSIZE);var bytesRead=0;var fd=process.stdin.fd;try{bytesRead=fs.readSync(fd,buf,0,BUFSIZE)}catch(e){if(e.toString().includes("EOF"))bytesRead=0;else throw e}if(bytesRead>0){result=buf.slice(0,bytesRead).toString("utf-8")}}else{}if(!result){return null}FS_stdin_getChar_buffer=intArrayFromString(result,true)}return FS_stdin_getChar_buffer.shift()};var TTY={ttys:[],init(){},shutdown(){},register(dev,ops){TTY.ttys[dev]={input:[],output:[],ops};FS.registerDevice(dev,TTY.stream_ops)},stream_ops:{open(stream){var tty=TTY.ttys[stream.node.rdev];if(!tty){throw new FS.ErrnoError(43)}stream.tty=tty;stream.seekable=false},close(stream){stream.tty.ops.fsync(stream.tty)},fsync(stream){stream.tty.ops.fsync(stream.tty)},read(stream,buffer,offset,length,pos){if(!stream.tty||!stream.tty.ops.get_char){throw new FS.ErrnoError(60)}var bytesRead=0;for(var i=0;i0){out(UTF8ArrayToString(tty.output));tty.output=[]}},ioctl_tcgets(tty){return{c_iflag:25856,c_oflag:5,c_cflag:191,c_lflag:35387,c_cc:[3,28,127,21,4,0,1,0,17,19,26,0,18,15,23,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}},ioctl_tcsets(tty,optional_actions,data){return 0},ioctl_tiocgwinsz(tty){return[24,80]}},default_tty1_ops:{put_char(tty,val){if(val===null||val===10){err(UTF8ArrayToString(tty.output));tty.output=[]}else{if(val!=0)tty.output.push(val)}},fsync(tty){if(tty.output?.length>0){err(UTF8ArrayToString(tty.output));tty.output=[]}}}};var zeroMemory=(ptr,size)=>HEAPU8.fill(0,ptr,ptr+size);var alignMemory=(size,alignment)=>Math.ceil(size/alignment)*alignment;var mmapAlloc=size=>{size=alignMemory(size,65536);var ptr=_emscripten_builtin_memalign(65536,size);if(ptr)zeroMemory(ptr,size);return ptr};var MEMFS={ops_table:null,mount(mount){return MEMFS.createNode(null,"/",16895,0)},createNode(parent,name,mode,dev){if(FS.isBlkdev(mode)||FS.isFIFO(mode)){throw new FS.ErrnoError(63)}MEMFS.ops_table||={dir:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr,lookup:MEMFS.node_ops.lookup,mknod:MEMFS.node_ops.mknod,rename:MEMFS.node_ops.rename,unlink:MEMFS.node_ops.unlink,rmdir:MEMFS.node_ops.rmdir,readdir:MEMFS.node_ops.readdir,symlink:MEMFS.node_ops.symlink},stream:{llseek:MEMFS.stream_ops.llseek}},file:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr},stream:{llseek:MEMFS.stream_ops.llseek,read:MEMFS.stream_ops.read,write:MEMFS.stream_ops.write,mmap:MEMFS.stream_ops.mmap,msync:MEMFS.stream_ops.msync}},link:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr,readlink:MEMFS.node_ops.readlink},stream:{}},chrdev:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr},stream:FS.chrdev_stream_ops}};var node=FS.createNode(parent,name,mode,dev);if(FS.isDir(node.mode)){node.node_ops=MEMFS.ops_table.dir.node;node.stream_ops=MEMFS.ops_table.dir.stream;node.contents={}}else if(FS.isFile(node.mode)){node.node_ops=MEMFS.ops_table.file.node;node.stream_ops=MEMFS.ops_table.file.stream;node.usedBytes=0;node.contents=null}else if(FS.isLink(node.mode)){node.node_ops=MEMFS.ops_table.link.node;node.stream_ops=MEMFS.ops_table.link.stream}else if(FS.isChrdev(node.mode)){node.node_ops=MEMFS.ops_table.chrdev.node;node.stream_ops=MEMFS.ops_table.chrdev.stream}node.atime=node.mtime=node.ctime=Date.now();if(parent){parent.contents[name]=node;parent.atime=parent.mtime=parent.ctime=node.atime}return node},getFileDataAsTypedArray(node){if(!node.contents)return new Uint8Array(0);if(node.contents.subarray)return node.contents.subarray(0,node.usedBytes);return new Uint8Array(node.contents)},expandFileStorage(node,newCapacity){var prevCapacity=node.contents?node.contents.length:0;if(prevCapacity>=newCapacity)return;var CAPACITY_DOUBLING_MAX=1024*1024;newCapacity=Math.max(newCapacity,prevCapacity*(prevCapacity>>0);if(prevCapacity!=0)newCapacity=Math.max(newCapacity,256);var oldContents=node.contents;node.contents=new Uint8Array(newCapacity);if(node.usedBytes>0)node.contents.set(oldContents.subarray(0,node.usedBytes),0)},resizeFileStorage(node,newSize){if(node.usedBytes==newSize)return;if(newSize==0){node.contents=null;node.usedBytes=0}else{var oldContents=node.contents;node.contents=new Uint8Array(newSize);if(oldContents){node.contents.set(oldContents.subarray(0,Math.min(newSize,node.usedBytes)))}node.usedBytes=newSize}},node_ops:{getattr(node){var attr={};attr.dev=FS.isChrdev(node.mode)?node.id:1;attr.ino=node.id;attr.mode=node.mode;attr.nlink=1;attr.uid=0;attr.gid=0;attr.rdev=node.rdev;if(FS.isDir(node.mode)){attr.size=4096}else if(FS.isFile(node.mode)){attr.size=node.usedBytes}else if(FS.isLink(node.mode)){attr.size=node.link.length}else{attr.size=0}attr.atime=new Date(node.atime);attr.mtime=new Date(node.mtime);attr.ctime=new Date(node.ctime);attr.blksize=4096;attr.blocks=Math.ceil(attr.size/attr.blksize);return attr},setattr(node,attr){for(const key of["mode","atime","mtime","ctime"]){if(attr[key]!=null){node[key]=attr[key]}}if(attr.size!==undefined){MEMFS.resizeFileStorage(node,attr.size)}},lookup(parent,name){if(!MEMFS.doesNotExistError){MEMFS.doesNotExistError=new FS.ErrnoError(44);MEMFS.doesNotExistError.stack=""}throw MEMFS.doesNotExistError},mknod(parent,name,mode,dev){return MEMFS.createNode(parent,name,mode,dev)},rename(old_node,new_dir,new_name){var new_node;try{new_node=FS.lookupNode(new_dir,new_name)}catch(e){}if(new_node){if(FS.isDir(old_node.mode)){for(var i in new_node.contents){throw new FS.ErrnoError(55)}}FS.hashRemoveNode(new_node)}delete old_node.parent.contents[old_node.name];new_dir.contents[new_name]=old_node;old_node.name=new_name;new_dir.ctime=new_dir.mtime=old_node.parent.ctime=old_node.parent.mtime=Date.now()},unlink(parent,name){delete parent.contents[name];parent.ctime=parent.mtime=Date.now()},rmdir(parent,name){var node=FS.lookupNode(parent,name);for(var i in node.contents){throw new FS.ErrnoError(55)}delete parent.contents[name];parent.ctime=parent.mtime=Date.now()},readdir(node){return[".","..",...Object.keys(node.contents)]},symlink(parent,newname,oldpath){var node=MEMFS.createNode(parent,newname,511|40960,0);node.link=oldpath;return node},readlink(node){if(!FS.isLink(node.mode)){throw new FS.ErrnoError(28)}return node.link}},stream_ops:{read(stream,buffer,offset,length,position){var contents=stream.node.contents;if(position>=stream.node.usedBytes)return 0;var size=Math.min(stream.node.usedBytes-position,length);if(size>8&&contents.subarray){buffer.set(contents.subarray(position,position+size),offset)}else{for(var i=0;i0||position+length{var flagModes={r:0,"r+":2,w:512|64|1,"w+":512|64|2,a:1024|64|1,"a+":1024|64|2};var flags=flagModes[str];if(typeof flags=="undefined"){throw new Error(`Unknown file open mode: ${str}`)}return flags};var FS_getMode=(canRead,canWrite)=>{var mode=0;if(canRead)mode|=292|73;if(canWrite)mode|=146;return mode};var asyncLoad=async url=>{var arrayBuffer=await readAsync(url);return new Uint8Array(arrayBuffer)};var FS_createDataFile=(...args)=>FS.createDataFile(...args);var getUniqueRunDependency=id=>id;var runDependencies=0;var dependenciesFulfilled=null;var removeRunDependency=id=>{runDependencies--;Module["monitorRunDependencies"]?.(runDependencies);if(runDependencies==0){if(dependenciesFulfilled){var callback=dependenciesFulfilled;dependenciesFulfilled=null;callback()}}};var addRunDependency=id=>{runDependencies++;Module["monitorRunDependencies"]?.(runDependencies)};var preloadPlugins=[];var FS_handledByPreloadPlugin=async(byteArray,fullname)=>{if(typeof Browser!="undefined")Browser.init();for(var plugin of preloadPlugins){if(plugin["canHandle"](fullname)){return plugin["handle"](byteArray,fullname)}}return byteArray};var FS_preloadFile=async(parent,name,url,canRead,canWrite,dontCreateFile,canOwn,preFinish)=>{var fullname=name?PATH_FS.resolve(PATH.join2(parent,name)):parent;var dep=getUniqueRunDependency(`cp ${fullname}`);addRunDependency(dep);try{var byteArray=url;if(typeof url=="string"){byteArray=await asyncLoad(url)}byteArray=await FS_handledByPreloadPlugin(byteArray,fullname);preFinish?.();if(!dontCreateFile){FS_createDataFile(parent,name,byteArray,canRead,canWrite,canOwn)}}finally{removeRunDependency(dep)}};var FS_createPreloadedFile=(parent,name,url,canRead,canWrite,onload,onerror,dontCreateFile,canOwn,preFinish)=>{FS_preloadFile(parent,name,url,canRead,canWrite,dontCreateFile,canOwn,preFinish).then(onload).catch(onerror)};var FS={root:null,mounts:[],devices:{},streams:[],nextInode:1,nameTable:null,currentPath:"/",initialized:false,ignorePermissions:true,filesystems:null,syncFSRequests:0,readFiles:{},ErrnoError:class{name="ErrnoError";constructor(errno){this.errno=errno}},FSStream:class{shared={};get object(){return this.node}set object(val){this.node=val}get isRead(){return(this.flags&2097155)!==1}get isWrite(){return(this.flags&2097155)!==0}get isAppend(){return this.flags&1024}get flags(){return this.shared.flags}set flags(val){this.shared.flags=val}get position(){return this.shared.position}set position(val){this.shared.position=val}},FSNode:class{node_ops={};stream_ops={};readMode=292|73;writeMode=146;mounted=null;constructor(parent,name,mode,rdev){if(!parent){parent=this}this.parent=parent;this.mount=parent.mount;this.id=FS.nextInode++;this.name=name;this.mode=mode;this.rdev=rdev;this.atime=this.mtime=this.ctime=Date.now()}get read(){return(this.mode&this.readMode)===this.readMode}set read(val){val?this.mode|=this.readMode:this.mode&=~this.readMode}get write(){return(this.mode&this.writeMode)===this.writeMode}set write(val){val?this.mode|=this.writeMode:this.mode&=~this.writeMode}get isFolder(){return FS.isDir(this.mode)}get isDevice(){return FS.isChrdev(this.mode)}},lookupPath(path,opts={}){if(!path){throw new FS.ErrnoError(44)}opts.follow_mount??=true;if(!PATH.isAbs(path)){path=FS.cwd()+"/"+path}linkloop:for(var nlinks=0;nlinks<40;nlinks++){var parts=path.split("/").filter(p=>!!p);var current=FS.root;var current_path="/";for(var i=0;i>>0)%FS.nameTable.length},hashAddNode(node){var hash=FS.hashName(node.parent.id,node.name);node.name_next=FS.nameTable[hash];FS.nameTable[hash]=node},hashRemoveNode(node){var hash=FS.hashName(node.parent.id,node.name);if(FS.nameTable[hash]===node){FS.nameTable[hash]=node.name_next}else{var current=FS.nameTable[hash];while(current){if(current.name_next===node){current.name_next=node.name_next;break}current=current.name_next}}},lookupNode(parent,name){var errCode=FS.mayLookup(parent);if(errCode){throw new FS.ErrnoError(errCode)}var hash=FS.hashName(parent.id,name);for(var node=FS.nameTable[hash];node;node=node.name_next){var nodeName=node.name;if(node.parent.id===parent.id&&nodeName===name){return node}}return FS.lookup(parent,name)},createNode(parent,name,mode,rdev){var node=new FS.FSNode(parent,name,mode,rdev);FS.hashAddNode(node);return node},destroyNode(node){FS.hashRemoveNode(node)},isRoot(node){return node===node.parent},isMountpoint(node){return!!node.mounted},isFile(mode){return(mode&61440)===32768},isDir(mode){return(mode&61440)===16384},isLink(mode){return(mode&61440)===40960},isChrdev(mode){return(mode&61440)===8192},isBlkdev(mode){return(mode&61440)===24576},isFIFO(mode){return(mode&61440)===4096},isSocket(mode){return(mode&49152)===49152},flagsToPermissionString(flag){var perms=["r","w","rw"][flag&3];if(flag&512){perms+="w"}return perms},nodePermissions(node,perms){if(FS.ignorePermissions){return 0}if(perms.includes("r")&&!(node.mode&292)){return 2}else if(perms.includes("w")&&!(node.mode&146)){return 2}else if(perms.includes("x")&&!(node.mode&73)){return 2}return 0},mayLookup(dir){if(!FS.isDir(dir.mode))return 54;var errCode=FS.nodePermissions(dir,"x");if(errCode)return errCode;if(!dir.node_ops.lookup)return 2;return 0},mayCreate(dir,name){if(!FS.isDir(dir.mode)){return 54}try{var node=FS.lookupNode(dir,name);return 20}catch(e){}return FS.nodePermissions(dir,"wx")},mayDelete(dir,name,isdir){var node;try{node=FS.lookupNode(dir,name)}catch(e){return e.errno}var errCode=FS.nodePermissions(dir,"wx");if(errCode){return errCode}if(isdir){if(!FS.isDir(node.mode)){return 54}if(FS.isRoot(node)||FS.getPath(node)===FS.cwd()){return 10}}else{if(FS.isDir(node.mode)){return 31}}return 0},mayOpen(node,flags){if(!node){return 44}if(FS.isLink(node.mode)){return 32}else if(FS.isDir(node.mode)){if(FS.flagsToPermissionString(flags)!=="r"||flags&(512|64)){return 31}}return FS.nodePermissions(node,FS.flagsToPermissionString(flags))},checkOpExists(op,err){if(!op){throw new FS.ErrnoError(err)}return op},MAX_OPEN_FDS:4096,nextfd(){for(var fd=0;fd<=FS.MAX_OPEN_FDS;fd++){if(!FS.streams[fd]){return fd}}throw new FS.ErrnoError(33)},getStreamChecked(fd){var stream=FS.getStream(fd);if(!stream){throw new FS.ErrnoError(8)}return stream},getStream:fd=>FS.streams[fd],createStream(stream,fd=-1){stream=Object.assign(new FS.FSStream,stream);if(fd==-1){fd=FS.nextfd()}stream.fd=fd;FS.streams[fd]=stream;return stream},closeStream(fd){FS.streams[fd]=null},dupStream(origStream,fd=-1){var stream=FS.createStream(origStream,fd);stream.stream_ops?.dup?.(stream);return stream},doSetAttr(stream,node,attr){var setattr=stream?.stream_ops.setattr;var arg=setattr?stream:node;setattr??=node.node_ops.setattr;FS.checkOpExists(setattr,63);setattr(arg,attr)},chrdev_stream_ops:{open(stream){var device=FS.getDevice(stream.node.rdev);stream.stream_ops=device.stream_ops;stream.stream_ops.open?.(stream)},llseek(){throw new FS.ErrnoError(70)}},major:dev=>dev>>8,minor:dev=>dev&255,makedev:(ma,mi)=>ma<<8|mi,registerDevice(dev,ops){FS.devices[dev]={stream_ops:ops}},getDevice:dev=>FS.devices[dev],getMounts(mount){var mounts=[];var check=[mount];while(check.length){var m=check.pop();mounts.push(m);check.push(...m.mounts)}return mounts},syncfs(populate,callback){if(typeof populate=="function"){callback=populate;populate=false}FS.syncFSRequests++;if(FS.syncFSRequests>1){err(`warning: ${FS.syncFSRequests} FS.syncfs operations in flight at once, probably just doing extra work`)}var mounts=FS.getMounts(FS.root.mount);var completed=0;function doCallback(errCode){FS.syncFSRequests--;return callback(errCode)}function done(errCode){if(errCode){if(!done.errored){done.errored=true;return doCallback(errCode)}return}if(++completed>=mounts.length){doCallback(null)}}for(var mount of mounts){if(mount.type.syncfs){mount.type.syncfs(mount,populate,done)}else{done(null)}}},mount(type,opts,mountpoint){var root=mountpoint==="/";var pseudo=!mountpoint;var node;if(root&&FS.root){throw new FS.ErrnoError(10)}else if(!root&&!pseudo){var lookup=FS.lookupPath(mountpoint,{follow_mount:false});mountpoint=lookup.path;node=lookup.node;if(FS.isMountpoint(node)){throw new FS.ErrnoError(10)}if(!FS.isDir(node.mode)){throw new FS.ErrnoError(54)}}var mount={type,opts,mountpoint,mounts:[]};var mountRoot=type.mount(mount);mountRoot.mount=mount;mount.root=mountRoot;if(root){FS.root=mountRoot}else if(node){node.mounted=mount;if(node.mount){node.mount.mounts.push(mount)}}return mountRoot},unmount(mountpoint){var lookup=FS.lookupPath(mountpoint,{follow_mount:false});if(!FS.isMountpoint(lookup.node)){throw new FS.ErrnoError(28)}var node=lookup.node;var mount=node.mounted;var mounts=FS.getMounts(mount);for(var[hash,current]of Object.entries(FS.nameTable)){while(current){var next=current.name_next;if(mounts.includes(current.mount)){FS.destroyNode(current)}current=next}}node.mounted=null;var idx=node.mount.mounts.indexOf(mount);node.mount.mounts.splice(idx,1)},lookup(parent,name){return parent.node_ops.lookup(parent,name)},mknod(path,mode,dev){var lookup=FS.lookupPath(path,{parent:true});var parent=lookup.node;var name=PATH.basename(path);if(!name){throw new FS.ErrnoError(28)}if(name==="."||name===".."){throw new FS.ErrnoError(20)}var errCode=FS.mayCreate(parent,name);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.mknod){throw new FS.ErrnoError(63)}return parent.node_ops.mknod(parent,name,mode,dev)},statfs(path){return FS.statfsNode(FS.lookupPath(path,{follow:true}).node)},statfsStream(stream){return FS.statfsNode(stream.node)},statfsNode(node){var rtn={bsize:4096,frsize:4096,blocks:1e6,bfree:5e5,bavail:5e5,files:FS.nextInode,ffree:FS.nextInode-1,fsid:42,flags:2,namelen:255};if(node.node_ops.statfs){Object.assign(rtn,node.node_ops.statfs(node.mount.opts.root))}return rtn},create(path,mode=438){mode&=4095;mode|=32768;return FS.mknod(path,mode,0)},mkdir(path,mode=511){mode&=511|512;mode|=16384;return FS.mknod(path,mode,0)},mkdirTree(path,mode){var dirs=path.split("/");var d="";for(var dir of dirs){if(!dir)continue;if(d||PATH.isAbs(path))d+="/";d+=dir;try{FS.mkdir(d,mode)}catch(e){if(e.errno!=20)throw e}}},mkdev(path,mode,dev){if(typeof dev=="undefined"){dev=mode;mode=438}mode|=8192;return FS.mknod(path,mode,dev)},symlink(oldpath,newpath){if(!PATH_FS.resolve(oldpath)){throw new FS.ErrnoError(44)}var lookup=FS.lookupPath(newpath,{parent:true});var parent=lookup.node;if(!parent){throw new FS.ErrnoError(44)}var newname=PATH.basename(newpath);var errCode=FS.mayCreate(parent,newname);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.symlink){throw new FS.ErrnoError(63)}return parent.node_ops.symlink(parent,newname,oldpath)},rename(old_path,new_path){var old_dirname=PATH.dirname(old_path);var new_dirname=PATH.dirname(new_path);var old_name=PATH.basename(old_path);var new_name=PATH.basename(new_path);var lookup,old_dir,new_dir;lookup=FS.lookupPath(old_path,{parent:true});old_dir=lookup.node;lookup=FS.lookupPath(new_path,{parent:true});new_dir=lookup.node;if(!old_dir||!new_dir)throw new FS.ErrnoError(44);if(old_dir.mount!==new_dir.mount){throw new FS.ErrnoError(75)}var old_node=FS.lookupNode(old_dir,old_name);var relative=PATH_FS.relative(old_path,new_dirname);if(relative.charAt(0)!=="."){throw new FS.ErrnoError(28)}relative=PATH_FS.relative(new_path,old_dirname);if(relative.charAt(0)!=="."){throw new FS.ErrnoError(55)}var new_node;try{new_node=FS.lookupNode(new_dir,new_name)}catch(e){}if(old_node===new_node){return}var isdir=FS.isDir(old_node.mode);var errCode=FS.mayDelete(old_dir,old_name,isdir);if(errCode){throw new FS.ErrnoError(errCode)}errCode=new_node?FS.mayDelete(new_dir,new_name,isdir):FS.mayCreate(new_dir,new_name);if(errCode){throw new FS.ErrnoError(errCode)}if(!old_dir.node_ops.rename){throw new FS.ErrnoError(63)}if(FS.isMountpoint(old_node)||new_node&&FS.isMountpoint(new_node)){throw new FS.ErrnoError(10)}if(new_dir!==old_dir){errCode=FS.nodePermissions(old_dir,"w");if(errCode){throw new FS.ErrnoError(errCode)}}FS.hashRemoveNode(old_node);try{old_dir.node_ops.rename(old_node,new_dir,new_name);old_node.parent=new_dir}catch(e){throw e}finally{FS.hashAddNode(old_node)}},rmdir(path){var lookup=FS.lookupPath(path,{parent:true});var parent=lookup.node;var name=PATH.basename(path);var node=FS.lookupNode(parent,name);var errCode=FS.mayDelete(parent,name,true);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.rmdir){throw new FS.ErrnoError(63)}if(FS.isMountpoint(node)){throw new FS.ErrnoError(10)}parent.node_ops.rmdir(parent,name);FS.destroyNode(node)},readdir(path){var lookup=FS.lookupPath(path,{follow:true});var node=lookup.node;var readdir=FS.checkOpExists(node.node_ops.readdir,54);return readdir(node)},unlink(path){var lookup=FS.lookupPath(path,{parent:true});var parent=lookup.node;if(!parent){throw new FS.ErrnoError(44)}var name=PATH.basename(path);var node=FS.lookupNode(parent,name);var errCode=FS.mayDelete(parent,name,false);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.unlink){throw new FS.ErrnoError(63)}if(FS.isMountpoint(node)){throw new FS.ErrnoError(10)}parent.node_ops.unlink(parent,name);FS.destroyNode(node)},readlink(path){var lookup=FS.lookupPath(path);var link=lookup.node;if(!link){throw new FS.ErrnoError(44)}if(!link.node_ops.readlink){throw new FS.ErrnoError(28)}return link.node_ops.readlink(link)},stat(path,dontFollow){var lookup=FS.lookupPath(path,{follow:!dontFollow});var node=lookup.node;var getattr=FS.checkOpExists(node.node_ops.getattr,63);return getattr(node)},fstat(fd){var stream=FS.getStreamChecked(fd);var node=stream.node;var getattr=stream.stream_ops.getattr;var arg=getattr?stream:node;getattr??=node.node_ops.getattr;FS.checkOpExists(getattr,63);return getattr(arg)},lstat(path){return FS.stat(path,true)},doChmod(stream,node,mode,dontFollow){FS.doSetAttr(stream,node,{mode:mode&4095|node.mode&~4095,ctime:Date.now(),dontFollow})},chmod(path,mode,dontFollow){var node;if(typeof path=="string"){var lookup=FS.lookupPath(path,{follow:!dontFollow});node=lookup.node}else{node=path}FS.doChmod(null,node,mode,dontFollow)},lchmod(path,mode){FS.chmod(path,mode,true)},fchmod(fd,mode){var stream=FS.getStreamChecked(fd);FS.doChmod(stream,stream.node,mode,false)},doChown(stream,node,dontFollow){FS.doSetAttr(stream,node,{timestamp:Date.now(),dontFollow})},chown(path,uid,gid,dontFollow){var node;if(typeof path=="string"){var lookup=FS.lookupPath(path,{follow:!dontFollow});node=lookup.node}else{node=path}FS.doChown(null,node,dontFollow)},lchown(path,uid,gid){FS.chown(path,uid,gid,true)},fchown(fd,uid,gid){var stream=FS.getStreamChecked(fd);FS.doChown(stream,stream.node,false)},doTruncate(stream,node,len){if(FS.isDir(node.mode)){throw new FS.ErrnoError(31)}if(!FS.isFile(node.mode)){throw new FS.ErrnoError(28)}var errCode=FS.nodePermissions(node,"w");if(errCode){throw new FS.ErrnoError(errCode)}FS.doSetAttr(stream,node,{size:len,timestamp:Date.now()})},truncate(path,len){if(len<0){throw new FS.ErrnoError(28)}var node;if(typeof path=="string"){var lookup=FS.lookupPath(path,{follow:true});node=lookup.node}else{node=path}FS.doTruncate(null,node,len)},ftruncate(fd,len){var stream=FS.getStreamChecked(fd);if(len<0||(stream.flags&2097155)===0){throw new FS.ErrnoError(28)}FS.doTruncate(stream,stream.node,len)},utime(path,atime,mtime){var lookup=FS.lookupPath(path,{follow:true});var node=lookup.node;var setattr=FS.checkOpExists(node.node_ops.setattr,63);setattr(node,{atime,mtime})},open(path,flags,mode=438){if(path===""){throw new FS.ErrnoError(44)}flags=typeof flags=="string"?FS_modeStringToFlags(flags):flags;if(flags&64){mode=mode&4095|32768}else{mode=0}var node;var isDirPath;if(typeof path=="object"){node=path}else{isDirPath=path.endsWith("/");var lookup=FS.lookupPath(path,{follow:!(flags&131072),noent_okay:true});node=lookup.node;path=lookup.path}var created=false;if(flags&64){if(node){if(flags&128){throw new FS.ErrnoError(20)}}else if(isDirPath){throw new FS.ErrnoError(31)}else{node=FS.mknod(path,mode|511,0);created=true}}if(!node){throw new FS.ErrnoError(44)}if(FS.isChrdev(node.mode)){flags&=~512}if(flags&65536&&!FS.isDir(node.mode)){throw new FS.ErrnoError(54)}if(!created){var errCode=FS.mayOpen(node,flags);if(errCode){throw new FS.ErrnoError(errCode)}}if(flags&512&&!created){FS.truncate(node,0)}flags&=~(128|512|131072);var stream=FS.createStream({node,path:FS.getPath(node),flags,seekable:true,position:0,stream_ops:node.stream_ops,ungotten:[],error:false});if(stream.stream_ops.open){stream.stream_ops.open(stream)}if(created){FS.chmod(node,mode&511)}if(Module["logReadFiles"]&&!(flags&1)){if(!(path in FS.readFiles)){FS.readFiles[path]=1}}return stream},close(stream){if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if(stream.getdents)stream.getdents=null;try{if(stream.stream_ops.close){stream.stream_ops.close(stream)}}catch(e){throw e}finally{FS.closeStream(stream.fd)}stream.fd=null},isClosed(stream){return stream.fd===null},llseek(stream,offset,whence){if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if(!stream.seekable||!stream.stream_ops.llseek){throw new FS.ErrnoError(70)}if(whence!=0&&whence!=1&&whence!=2){throw new FS.ErrnoError(28)}stream.position=stream.stream_ops.llseek(stream,offset,whence);stream.ungotten=[];return stream.position},read(stream,buffer,offset,length,position){if(length<0||position<0){throw new FS.ErrnoError(28)}if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if((stream.flags&2097155)===1){throw new FS.ErrnoError(8)}if(FS.isDir(stream.node.mode)){throw new FS.ErrnoError(31)}if(!stream.stream_ops.read){throw new FS.ErrnoError(28)}var seeking=typeof position!="undefined";if(!seeking){position=stream.position}else if(!stream.seekable){throw new FS.ErrnoError(70)}var bytesRead=stream.stream_ops.read(stream,buffer,offset,length,position);if(!seeking)stream.position+=bytesRead;return bytesRead},write(stream,buffer,offset,length,position,canOwn){if(length<0||position<0){throw new FS.ErrnoError(28)}if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if((stream.flags&2097155)===0){throw new FS.ErrnoError(8)}if(FS.isDir(stream.node.mode)){throw new FS.ErrnoError(31)}if(!stream.stream_ops.write){throw new FS.ErrnoError(28)}if(stream.seekable&&stream.flags&1024){FS.llseek(stream,0,2)}var seeking=typeof position!="undefined";if(!seeking){position=stream.position}else if(!stream.seekable){throw new FS.ErrnoError(70)}var bytesWritten=stream.stream_ops.write(stream,buffer,offset,length,position,canOwn);if(!seeking)stream.position+=bytesWritten;return bytesWritten},mmap(stream,length,position,prot,flags){if((prot&2)!==0&&(flags&2)===0&&(stream.flags&2097155)!==2){throw new FS.ErrnoError(2)}if((stream.flags&2097155)===1){throw new FS.ErrnoError(2)}if(!stream.stream_ops.mmap){throw new FS.ErrnoError(43)}if(!length){throw new FS.ErrnoError(28)}return stream.stream_ops.mmap(stream,length,position,prot,flags)},msync(stream,buffer,offset,length,mmapFlags){if(!stream.stream_ops.msync){return 0}return stream.stream_ops.msync(stream,buffer,offset,length,mmapFlags)},ioctl(stream,cmd,arg){if(!stream.stream_ops.ioctl){throw new FS.ErrnoError(59)}return stream.stream_ops.ioctl(stream,cmd,arg)},readFile(path,opts={}){opts.flags=opts.flags||0;opts.encoding=opts.encoding||"binary";if(opts.encoding!=="utf8"&&opts.encoding!=="binary"){abort(`Invalid encoding type "${opts.encoding}"`)}var stream=FS.open(path,opts.flags);var stat=FS.stat(path);var length=stat.size;var buf=new Uint8Array(length);FS.read(stream,buf,0,length,0);if(opts.encoding==="utf8"){buf=UTF8ArrayToString(buf)}FS.close(stream);return buf},writeFile(path,data,opts={}){opts.flags=opts.flags||577;var stream=FS.open(path,opts.flags,opts.mode);if(typeof data=="string"){data=new Uint8Array(intArrayFromString(data,true))}if(ArrayBuffer.isView(data)){FS.write(stream,data,0,data.byteLength,undefined,opts.canOwn)}else{abort("Unsupported data type")}FS.close(stream)},cwd:()=>FS.currentPath,chdir(path){var lookup=FS.lookupPath(path,{follow:true});if(lookup.node===null){throw new FS.ErrnoError(44)}if(!FS.isDir(lookup.node.mode)){throw new FS.ErrnoError(54)}var errCode=FS.nodePermissions(lookup.node,"x");if(errCode){throw new FS.ErrnoError(errCode)}FS.currentPath=lookup.path},createDefaultDirectories(){FS.mkdir("/tmp");FS.mkdir("/home");FS.mkdir("/home/web_user")},createDefaultDevices(){FS.mkdir("/dev");FS.registerDevice(FS.makedev(1,3),{read:()=>0,write:(stream,buffer,offset,length,pos)=>length,llseek:()=>0});FS.mkdev("/dev/null",FS.makedev(1,3));TTY.register(FS.makedev(5,0),TTY.default_tty_ops);TTY.register(FS.makedev(6,0),TTY.default_tty1_ops);FS.mkdev("/dev/tty",FS.makedev(5,0));FS.mkdev("/dev/tty1",FS.makedev(6,0));var randomBuffer=new Uint8Array(1024),randomLeft=0;var randomByte=()=>{if(randomLeft===0){randomFill(randomBuffer);randomLeft=randomBuffer.byteLength}return randomBuffer[--randomLeft]};FS.createDevice("/dev","random",randomByte);FS.createDevice("/dev","urandom",randomByte);FS.mkdir("/dev/shm");FS.mkdir("/dev/shm/tmp")},createSpecialDirectories(){FS.mkdir("/proc");var proc_self=FS.mkdir("/proc/self");FS.mkdir("/proc/self/fd");FS.mount({mount(){var node=FS.createNode(proc_self,"fd",16895,73);node.stream_ops={llseek:MEMFS.stream_ops.llseek};node.node_ops={lookup(parent,name){var fd=+name;var stream=FS.getStreamChecked(fd);var ret={parent:null,mount:{mountpoint:"fake"},node_ops:{readlink:()=>stream.path},id:fd+1};ret.parent=ret;return ret},readdir(){return Array.from(FS.streams.entries()).filter(([k,v])=>v).map(([k,v])=>k.toString())}};return node}},{},"/proc/self/fd")},createStandardStreams(input,output,error){if(input){FS.createDevice("/dev","stdin",input)}else{FS.symlink("/dev/tty","/dev/stdin")}if(output){FS.createDevice("/dev","stdout",null,output)}else{FS.symlink("/dev/tty","/dev/stdout")}if(error){FS.createDevice("/dev","stderr",null,error)}else{FS.symlink("/dev/tty1","/dev/stderr")}var stdin=FS.open("/dev/stdin",0);var stdout=FS.open("/dev/stdout",1);var stderr=FS.open("/dev/stderr",1)},staticInit(){FS.nameTable=new Array(4096);FS.mount(MEMFS,{},"/");FS.createDefaultDirectories();FS.createDefaultDevices();FS.createSpecialDirectories();FS.filesystems={MEMFS}},init(input,output,error){FS.initialized=true;input??=Module["stdin"];output??=Module["stdout"];error??=Module["stderr"];FS.createStandardStreams(input,output,error)},quit(){FS.initialized=false;for(var stream of FS.streams){if(stream){FS.close(stream)}}},findObject(path,dontResolveLastLink){var ret=FS.analyzePath(path,dontResolveLastLink);if(!ret.exists){return null}return ret.object},analyzePath(path,dontResolveLastLink){try{var lookup=FS.lookupPath(path,{follow:!dontResolveLastLink});path=lookup.path}catch(e){}var ret={isRoot:false,exists:false,error:0,name:null,path:null,object:null,parentExists:false,parentPath:null,parentObject:null};try{var lookup=FS.lookupPath(path,{parent:true});ret.parentExists=true;ret.parentPath=lookup.path;ret.parentObject=lookup.node;ret.name=PATH.basename(path);lookup=FS.lookupPath(path,{follow:!dontResolveLastLink});ret.exists=true;ret.path=lookup.path;ret.object=lookup.node;ret.name=lookup.node.name;ret.isRoot=lookup.path==="/"}catch(e){ret.error=e.errno}return ret},createPath(parent,path,canRead,canWrite){parent=typeof parent=="string"?parent:FS.getPath(parent);var parts=path.split("/").reverse();while(parts.length){var part=parts.pop();if(!part)continue;var current=PATH.join2(parent,part);try{FS.mkdir(current)}catch(e){if(e.errno!=20)throw e}parent=current}return current},createFile(parent,name,properties,canRead,canWrite){var path=PATH.join2(typeof parent=="string"?parent:FS.getPath(parent),name);var mode=FS_getMode(canRead,canWrite);return FS.create(path,mode)},createDataFile(parent,name,data,canRead,canWrite,canOwn){var path=name;if(parent){parent=typeof parent=="string"?parent:FS.getPath(parent);path=name?PATH.join2(parent,name):parent}var mode=FS_getMode(canRead,canWrite);var node=FS.create(path,mode);if(data){if(typeof data=="string"){var arr=new Array(data.length);for(var i=0,len=data.length;ithis.length-1||idx<0){return undefined}var chunkOffset=idx%this.chunkSize;var chunkNum=idx/this.chunkSize|0;return this.getter(chunkNum)[chunkOffset]}setDataGetter(getter){this.getter=getter}cacheLength(){var xhr=new XMLHttpRequest;xhr.open("HEAD",url,false);xhr.send(null);if(!(xhr.status>=200&&xhr.status<300||xhr.status===304))abort("Couldn't load "+url+". Status: "+xhr.status);var datalength=Number(xhr.getResponseHeader("Content-length"));var header;var hasByteServing=(header=xhr.getResponseHeader("Accept-Ranges"))&&header==="bytes";var usesGzip=(header=xhr.getResponseHeader("Content-Encoding"))&&header==="gzip";var chunkSize=1024*1024;if(!hasByteServing)chunkSize=datalength;var doXHR=(from,to)=>{if(from>to)abort("invalid range ("+from+", "+to+") or no bytes requested!");if(to>datalength-1)abort("only "+datalength+" bytes available! programmer error!");var xhr=new XMLHttpRequest;xhr.open("GET",url,false);if(datalength!==chunkSize)xhr.setRequestHeader("Range","bytes="+from+"-"+to);xhr.responseType="arraybuffer";if(xhr.overrideMimeType){xhr.overrideMimeType("text/plain; charset=x-user-defined")}xhr.send(null);if(!(xhr.status>=200&&xhr.status<300||xhr.status===304))abort("Couldn't load "+url+". Status: "+xhr.status);if(xhr.response!==undefined){return new Uint8Array(xhr.response||[])}return intArrayFromString(xhr.responseText||"",true)};var lazyArray=this;lazyArray.setDataGetter(chunkNum=>{var start=chunkNum*chunkSize;var end=(chunkNum+1)*chunkSize-1;end=Math.min(end,datalength-1);if(typeof lazyArray.chunks[chunkNum]=="undefined"){lazyArray.chunks[chunkNum]=doXHR(start,end)}if(typeof lazyArray.chunks[chunkNum]=="undefined")abort("doXHR failed!");return lazyArray.chunks[chunkNum]});if(usesGzip||!datalength){chunkSize=datalength=1;datalength=this.getter(0).length;chunkSize=datalength;out("LazyFiles on gzip forces download of the whole file when length is accessed")}this._length=datalength;this._chunkSize=chunkSize;this.lengthKnown=true}get length(){if(!this.lengthKnown){this.cacheLength()}return this._length}get chunkSize(){if(!this.lengthKnown){this.cacheLength()}return this._chunkSize}}if(globalThis.XMLHttpRequest){if(!ENVIRONMENT_IS_WORKER)abort("Cannot do synchronous binary XHRs outside webworkers in modern browsers. Use --embed-file or --preload-file in emcc");var lazyArray=new LazyUint8Array;var properties={isDevice:false,contents:lazyArray}}else{var properties={isDevice:false,url}}var node=FS.createFile(parent,name,properties,canRead,canWrite);if(properties.contents){node.contents=properties.contents}else if(properties.url){node.contents=null;node.url=properties.url}Object.defineProperties(node,{usedBytes:{get:function(){return this.contents.length}}});var stream_ops={};for(const[key,fn]of Object.entries(node.stream_ops)){stream_ops[key]=(...args)=>{FS.forceLoadFile(node);return fn(...args)}}function writeChunks(stream,buffer,offset,length,position){var contents=stream.node.contents;if(position>=contents.length)return 0;var size=Math.min(contents.length-position,length);if(contents.slice){for(var i=0;i{FS.forceLoadFile(node);return writeChunks(stream,buffer,offset,length,position)};stream_ops.mmap=(stream,length,position,prot,flags)=>{FS.forceLoadFile(node);var ptr=mmapAlloc(length);if(!ptr){throw new FS.ErrnoError(48)}writeChunks(stream,HEAP8,ptr,length,position);return{ptr,allocated:true}};node.stream_ops=stream_ops;return node}};var SYSCALLS={DEFAULT_POLLMASK:5,calculateAt(dirfd,path,allowEmpty){if(PATH.isAbs(path)){return path}var dir;if(dirfd===-100){dir=FS.cwd()}else{var dirstream=SYSCALLS.getStreamFromFD(dirfd);dir=dirstream.path}if(path.length==0){if(!allowEmpty){throw new FS.ErrnoError(44)}return dir}return dir+"/"+path},writeStat(buf,stat){HEAPU32[buf>>2]=stat.dev;HEAPU32[buf+4>>2]=stat.mode;HEAPU32[buf+8>>2]=stat.nlink;HEAPU32[buf+12>>2]=stat.uid;HEAPU32[buf+16>>2]=stat.gid;HEAPU32[buf+20>>2]=stat.rdev;HEAP64[buf+24>>3]=BigInt(stat.size);HEAP32[buf+32>>2]=4096;HEAP32[buf+36>>2]=stat.blocks;var atime=stat.atime.getTime();var mtime=stat.mtime.getTime();var ctime=stat.ctime.getTime();HEAP64[buf+40>>3]=BigInt(Math.floor(atime/1e3));HEAPU32[buf+48>>2]=atime%1e3*1e3*1e3;HEAP64[buf+56>>3]=BigInt(Math.floor(mtime/1e3));HEAPU32[buf+64>>2]=mtime%1e3*1e3*1e3;HEAP64[buf+72>>3]=BigInt(Math.floor(ctime/1e3));HEAPU32[buf+80>>2]=ctime%1e3*1e3*1e3;HEAP64[buf+88>>3]=BigInt(stat.ino);return 0},writeStatFs(buf,stats){HEAPU32[buf+4>>2]=stats.bsize;HEAPU32[buf+60>>2]=stats.bsize;HEAP64[buf+8>>3]=BigInt(stats.blocks);HEAP64[buf+16>>3]=BigInt(stats.bfree);HEAP64[buf+24>>3]=BigInt(stats.bavail);HEAP64[buf+32>>3]=BigInt(stats.files);HEAP64[buf+40>>3]=BigInt(stats.ffree);HEAPU32[buf+48>>2]=stats.fsid;HEAPU32[buf+64>>2]=stats.flags;HEAPU32[buf+56>>2]=stats.namelen},doMsync(addr,stream,len,flags,offset){if(!FS.isFile(stream.node.mode)){throw new FS.ErrnoError(43)}if(flags&2){return 0}var buffer=HEAPU8.slice(addr,addr+len);FS.msync(stream,buffer,offset,len,flags)},getStreamFromFD(fd){var stream=FS.getStreamChecked(fd);return stream},varargs:undefined,getStr(ptr){var ret=UTF8ToString(ptr);return ret}};function ___syscall_fcntl64(fd,cmd,varargs){SYSCALLS.varargs=varargs;try{var stream=SYSCALLS.getStreamFromFD(fd);switch(cmd){case 0:{var arg=syscallGetVarargI();if(arg<0){return-28}while(FS.streams[arg]){arg++}var newStream;newStream=FS.dupStream(stream,arg);return newStream.fd}case 1:case 2:return 0;case 3:return stream.flags;case 4:{var arg=syscallGetVarargI();stream.flags|=arg;return 0}case 12:{var arg=syscallGetVarargP();var offset=0;HEAP16[arg+offset>>1]=2;return 0}case 13:case 14:return 0}return-28}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_fstat64(fd,buf){try{return SYSCALLS.writeStat(buf,FS.fstat(fd))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_ioctl(fd,op,varargs){SYSCALLS.varargs=varargs;try{var stream=SYSCALLS.getStreamFromFD(fd);switch(op){case 21509:{if(!stream.tty)return-59;return 0}case 21505:{if(!stream.tty)return-59;if(stream.tty.ops.ioctl_tcgets){var termios=stream.tty.ops.ioctl_tcgets(stream);var argp=syscallGetVarargP();HEAP32[argp>>2]=termios.c_iflag||0;HEAP32[argp+4>>2]=termios.c_oflag||0;HEAP32[argp+8>>2]=termios.c_cflag||0;HEAP32[argp+12>>2]=termios.c_lflag||0;for(var i=0;i<32;i++){HEAP8[argp+i+17]=termios.c_cc[i]||0}return 0}return 0}case 21510:case 21511:case 21512:{if(!stream.tty)return-59;return 0}case 21506:case 21507:case 21508:{if(!stream.tty)return-59;if(stream.tty.ops.ioctl_tcsets){var argp=syscallGetVarargP();var c_iflag=HEAP32[argp>>2];var c_oflag=HEAP32[argp+4>>2];var c_cflag=HEAP32[argp+8>>2];var c_lflag=HEAP32[argp+12>>2];var c_cc=[];for(var i=0;i<32;i++){c_cc.push(HEAP8[argp+i+17])}return stream.tty.ops.ioctl_tcsets(stream.tty,op,{c_iflag,c_oflag,c_cflag,c_lflag,c_cc})}return 0}case 21519:{if(!stream.tty)return-59;var argp=syscallGetVarargP();HEAP32[argp>>2]=0;return 0}case 21520:{if(!stream.tty)return-59;return-28}case 21537:case 21531:{var argp=syscallGetVarargP();return FS.ioctl(stream,op,argp)}case 21523:{if(!stream.tty)return-59;if(stream.tty.ops.ioctl_tiocgwinsz){var winsize=stream.tty.ops.ioctl_tiocgwinsz(stream.tty);var argp=syscallGetVarargP();HEAP16[argp>>1]=winsize[0];HEAP16[argp+2>>1]=winsize[1]}return 0}case 21524:{if(!stream.tty)return-59;return 0}case 21515:{if(!stream.tty)return-59;return 0}default:return-28}}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_lstat64(path,buf){try{path=SYSCALLS.getStr(path);return SYSCALLS.writeStat(buf,FS.lstat(path))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_newfstatat(dirfd,path,buf,flags){try{path=SYSCALLS.getStr(path);var nofollow=flags&256;var allowEmpty=flags&4096;flags=flags&~6400;path=SYSCALLS.calculateAt(dirfd,path,allowEmpty);return SYSCALLS.writeStat(buf,nofollow?FS.lstat(path):FS.stat(path))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_openat(dirfd,path,flags,varargs){SYSCALLS.varargs=varargs;try{path=SYSCALLS.getStr(path);path=SYSCALLS.calculateAt(dirfd,path);var mode=varargs?syscallGetVarargI():0;return FS.open(path,flags,mode).fd}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_stat64(path,buf){try{path=SYSCALLS.getStr(path);return SYSCALLS.writeStat(buf,FS.stat(path))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}var __abort_js=()=>abort("");var INT53_MAX=9007199254740992;var INT53_MIN=-9007199254740992;var bigintToI53Checked=num=>numINT53_MAX?NaN:Number(num);function __munmap_js(addr,len,prot,flags,fd,offset){offset=bigintToI53Checked(offset);try{var stream=SYSCALLS.getStreamFromFD(fd);if(prot&2){SYSCALLS.doMsync(addr,stream,len,flags,offset)}}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}var stringToUTF8=(str,outPtr,maxBytesToWrite)=>stringToUTF8Array(str,HEAPU8,outPtr,maxBytesToWrite);var __tzset_js=(timezone,daylight,std_name,dst_name)=>{var currentYear=(new Date).getFullYear();var winter=new Date(currentYear,0,1);var summer=new Date(currentYear,6,1);var winterOffset=winter.getTimezoneOffset();var summerOffset=summer.getTimezoneOffset();var stdTimezoneOffset=Math.max(winterOffset,summerOffset);HEAPU32[timezone>>2]=stdTimezoneOffset*60;HEAP32[daylight>>2]=Number(winterOffset!=summerOffset);var extractZone=timezoneOffset=>{var sign=timezoneOffset>=0?"-":"+";var absOffset=Math.abs(timezoneOffset);var hours=String(Math.floor(absOffset/60)).padStart(2,"0");var minutes=String(absOffset%60).padStart(2,"0");return`UTC${sign}${hours}${minutes}`};var winterName=extractZone(winterOffset);var summerName=extractZone(summerOffset);if(summerOffset2147483648;var growMemory=size=>{var oldHeapSize=wasmMemory.buffer.byteLength;var pages=(size-oldHeapSize+65535)/65536|0;try{wasmMemory.grow(pages);updateMemoryViews();return 1}catch(e){}};var _emscripten_resize_heap=requestedSize=>{var oldSize=HEAPU8.length;requestedSize>>>=0;var maxHeapSize=getHeapMax();if(requestedSize>maxHeapSize){return false}for(var cutDown=1;cutDown<=4;cutDown*=2){var overGrownHeapSize=oldSize*(1+.2/cutDown);overGrownHeapSize=Math.min(overGrownHeapSize,requestedSize+100663296);var newSize=Math.min(maxHeapSize,alignMemory(Math.max(requestedSize,overGrownHeapSize),65536));var replacement=growMemory(newSize);if(replacement){return true}}return false};var _emscripten_throw_string=str=>{throw UTF8ToString(str)};var ENV={};var getExecutableName=()=>thisProgram||"./this.program";var getEnvStrings=()=>{if(!getEnvStrings.strings){var lang=(globalThis.navigator?.language??"C").replace("-","_")+".UTF-8";var env={USER:"web_user",LOGNAME:"web_user",PATH:"/",PWD:"/",HOME:"/home/web_user",LANG:lang,_:getExecutableName()};for(var x in ENV){if(ENV[x]===undefined)delete env[x];else env[x]=ENV[x]}var strings=[];for(var x in env){strings.push(`${x}=${env[x]}`)}getEnvStrings.strings=strings}return getEnvStrings.strings};var _environ_get=(__environ,environ_buf)=>{var bufSize=0;var envp=0;for(var string of getEnvStrings()){var ptr=environ_buf+bufSize;HEAPU32[__environ+envp>>2]=ptr;bufSize+=stringToUTF8(string,ptr,Infinity)+1;envp+=4}return 0};var _environ_sizes_get=(penviron_count,penviron_buf_size)=>{var strings=getEnvStrings();HEAPU32[penviron_count>>2]=strings.length;var bufSize=0;for(var string of strings){bufSize+=lengthBytesUTF8(string)+1}HEAPU32[penviron_buf_size>>2]=bufSize;return 0};function _fd_close(fd){try{var stream=SYSCALLS.getStreamFromFD(fd);FS.close(stream);return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return e.errno}}var doReadv=(stream,iov,iovcnt,offset)=>{var ret=0;for(var i=0;i>2];var len=HEAPU32[iov+4>>2];iov+=8;var curr=FS.read(stream,HEAP8,ptr,len,offset);if(curr<0)return-1;ret+=curr;if(curr>2]=num;return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return e.errno}}function _fd_seek(fd,offset,whence,newOffset){offset=bigintToI53Checked(offset);try{if(isNaN(offset))return 61;var stream=SYSCALLS.getStreamFromFD(fd);FS.llseek(stream,offset,whence);HEAP64[newOffset>>3]=BigInt(stream.position);if(stream.getdents&&offset===0&&whence===0)stream.getdents=null;return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return e.errno}}var doWritev=(stream,iov,iovcnt,offset)=>{var ret=0;for(var i=0;i>2];var len=HEAPU32[iov+4>>2];iov+=8;var curr=FS.write(stream,HEAP8,ptr,len,offset);if(curr<0)return-1;ret+=curr;if(curr>2]=num;return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return e.errno}}var _llvm_eh_typeid_for=type=>type;var wasmTableMirror=[];var getWasmTableEntry=funcPtr=>{var func=wasmTableMirror[funcPtr];if(!func){wasmTableMirror[funcPtr]=func=wasmTable.get(funcPtr)}return func};var getCFunc=ident=>{var func=Module["_"+ident];return func};var writeArrayToMemory=(array,buffer)=>{HEAP8.set(array,buffer)};var stackAlloc=sz=>__emscripten_stack_alloc(sz);var stringToUTF8OnStack=str=>{var size=lengthBytesUTF8(str)+1;var ret=stackAlloc(size);stringToUTF8(str,ret,size);return ret};var ccall=(ident,returnType,argTypes,args,opts)=>{var toC={string:str=>{var ret=0;if(str!==null&&str!==undefined&&str!==0){ret=stringToUTF8OnStack(str)}return ret},array:arr=>{var ret=stackAlloc(arr.length);writeArrayToMemory(arr,ret);return ret}};function convertReturnValue(ret){if(returnType==="string"){return UTF8ToString(ret)}if(returnType==="boolean")return Boolean(ret);return ret}var func=getCFunc(ident);var cArgs=[];var stack=0;if(args){for(var i=0;i{var numericArgs=!argTypes||argTypes.every(type=>type==="number"||type==="boolean");var numericRet=returnType!=="string";if(numericRet&&numericArgs&&!opts){return getCFunc(ident)}return(...args)=>ccall(ident,returnType,argTypes,args,opts)};var FS_createPath=(...args)=>FS.createPath(...args);var FS_unlink=(...args)=>FS.unlink(...args);var FS_createLazyFile=(...args)=>FS.createLazyFile(...args);var FS_createDevice=(...args)=>FS.createDevice(...args);FS.createPreloadedFile=FS_createPreloadedFile;FS.preloadFile=FS_preloadFile;FS.staticInit();{if(Module["noExitRuntime"])noExitRuntime=Module["noExitRuntime"];if(Module["preloadPlugins"])preloadPlugins=Module["preloadPlugins"];if(Module["print"])out=Module["print"];if(Module["printErr"])err=Module["printErr"];if(Module["wasmBinary"])wasmBinary=Module["wasmBinary"];if(Module["arguments"])arguments_=Module["arguments"];if(Module["thisProgram"])thisProgram=Module["thisProgram"];if(Module["preInit"]){if(typeof Module["preInit"]=="function")Module["preInit"]=[Module["preInit"]];while(Module["preInit"].length>0){Module["preInit"].shift()()}}}Module["addRunDependency"]=addRunDependency;Module["removeRunDependency"]=removeRunDependency;Module["ccall"]=ccall;Module["cwrap"]=cwrap;Module["FS_preloadFile"]=FS_preloadFile;Module["FS_unlink"]=FS_unlink;Module["FS_createPath"]=FS_createPath;Module["FS_createDevice"]=FS_createDevice;Module["FS"]=FS;Module["FS_createDataFile"]=FS_createDataFile;Module["FS_createLazyFile"]=FS_createLazyFile;var ___cxa_free_exception,_opencc_create,_opencc_convert,_opencc_destroy,_emscripten_builtin_memalign,_setThrew,__emscripten_tempret_set,__emscripten_stack_restore,__emscripten_stack_alloc,_emscripten_stack_get_current,___cxa_decrement_exception_refcount,___cxa_increment_exception_refcount,___cxa_can_catch,___cxa_get_exception_ptr,memory,__indirect_function_table,wasmMemory,wasmTable;function assignWasmExports(wasmExports){___cxa_free_exception=wasmExports["__cxa_free_exception"];_opencc_create=Module["_opencc_create"]=wasmExports["opencc_create"];_opencc_convert=Module["_opencc_convert"]=wasmExports["opencc_convert"];_opencc_destroy=Module["_opencc_destroy"]=wasmExports["opencc_destroy"];_emscripten_builtin_memalign=wasmExports["emscripten_builtin_memalign"];_setThrew=wasmExports["setThrew"];__emscripten_tempret_set=wasmExports["_emscripten_tempret_set"];__emscripten_stack_restore=wasmExports["_emscripten_stack_restore"];__emscripten_stack_alloc=wasmExports["_emscripten_stack_alloc"];_emscripten_stack_get_current=wasmExports["emscripten_stack_get_current"];___cxa_decrement_exception_refcount=wasmExports["__cxa_decrement_exception_refcount"];___cxa_increment_exception_refcount=wasmExports["__cxa_increment_exception_refcount"];___cxa_can_catch=wasmExports["__cxa_can_catch"];___cxa_get_exception_ptr=wasmExports["__cxa_get_exception_ptr"];memory=wasmMemory=wasmExports["memory"];__indirect_function_table=wasmTable=wasmExports["__indirect_function_table"]}var wasmImports={__assert_fail:___assert_fail,__cxa_begin_catch:___cxa_begin_catch,__cxa_end_catch:___cxa_end_catch,__cxa_find_matching_catch_2:___cxa_find_matching_catch_2,__cxa_find_matching_catch_3:___cxa_find_matching_catch_3,__cxa_rethrow:___cxa_rethrow,__cxa_throw:___cxa_throw,__cxa_uncaught_exceptions:___cxa_uncaught_exceptions,__resumeException:___resumeException,__syscall_fcntl64:___syscall_fcntl64,__syscall_fstat64:___syscall_fstat64,__syscall_ioctl:___syscall_ioctl,__syscall_lstat64:___syscall_lstat64,__syscall_newfstatat:___syscall_newfstatat,__syscall_openat:___syscall_openat,__syscall_stat64:___syscall_stat64,_abort_js:__abort_js,_munmap_js:__munmap_js,_tzset_js:__tzset_js,emscripten_resize_heap:_emscripten_resize_heap,emscripten_throw_string:_emscripten_throw_string,environ_get:_environ_get,environ_sizes_get:_environ_sizes_get,fd_close:_fd_close,fd_read:_fd_read,fd_seek:_fd_seek,fd_write:_fd_write,invoke_diii,invoke_fiii,invoke_i,invoke_ii,invoke_iii,invoke_iiii,invoke_iiiii,invoke_iiiiii,invoke_iiiiiii,invoke_iiiiiiii,invoke_iiiiiiiiiii,invoke_iiiiiiiiiiii,invoke_iiiiiiiiiiiii,invoke_jiiii,invoke_v,invoke_vi,invoke_vii,invoke_viii,invoke_viiii,invoke_viiiii,invoke_viiiiii,invoke_viiiiiii,invoke_viiiiiiiiii,invoke_viiiiiiiiiiiiiii,llvm_eh_typeid_for:_llvm_eh_typeid_for};function invoke_viiiiiii(index,a1,a2,a3,a4,a5,a6,a7){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6,a7)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_ii(index,a1){var sp=stackSave();try{return getWasmTableEntry(index)(a1)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_viiii(index,a1,a2,a3,a4){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2,a3,a4)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_vii(index,a1,a2){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_iii(index,a1,a2){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_viii(index,a1,a2,a3){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2,a3)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_v(index){var sp=stackSave();try{getWasmTableEntry(index)()}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_iiii(index,a1,a2,a3){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_viiiii(index,a1,a2,a3,a4,a5){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2,a3,a4,a5)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_iiiii(index,a1,a2,a3,a4){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3,a4)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_vi(index,a1){var sp=stackSave();try{getWasmTableEntry(index)(a1)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_iiiiiii(index,a1,a2,a3,a4,a5,a6){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_viiiiii(index,a1,a2,a3,a4,a5,a6){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_iiiiii(index,a1,a2,a3,a4,a5){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3,a4,a5)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_iiiiiiii(index,a1,a2,a3,a4,a5,a6,a7){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6,a7)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_iiiiiiiiiii(index,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6,a7,a8,a9,a10)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_jiiii(index,a1,a2,a3,a4){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3,a4)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0);return 0n}}function invoke_iiiiiiiiiiiii(index,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_fiii(index,a1,a2,a3){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_diii(index,a1,a2,a3){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_i(index){var sp=stackSave();try{return getWasmTableEntry(index)()}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_iiiiiiiiiiii(index,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_viiiiiiiiii(index,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6,a7,a8,a9,a10)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_viiiiiiiiiiiiiii(index,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function run(){if(runDependencies>0){dependenciesFulfilled=run;return}preRun();if(runDependencies>0){dependenciesFulfilled=run;return}function doRun(){Module["calledRun"]=true;if(ABORT)return;initRuntime();readyPromiseResolve?.(Module);Module["onRuntimeInitialized"]?.();postRun()}if(Module["setStatus"]){Module["setStatus"]("Running...");setTimeout(()=>{setTimeout(()=>Module["setStatus"](""),1);doRun()},1)}else{doRun()}}var wasmExports;wasmExports=await (createWasm());run();if(runtimeInitialized){moduleRtn=Module}else{moduleRtn=new Promise((resolve,reject)=>{readyPromiseResolve=resolve;readyPromiseReject=reject})} +;return moduleRtn}if(typeof exports==="object"&&typeof module==="object"){module.exports=createOpenCCWasm;module.exports.default=createOpenCCWasm}else if(typeof define==="function"&&define["amd"])define([],()=>createOpenCCWasm); diff --git a/wasm-lib/dist/data/config/hk2s.json b/wasm-lib/dist/data/config/hk2s.json new file mode 100644 index 000000000..cf0e9b975 --- /dev/null +++ b/wasm-lib/dist/data/config/hk2s.json @@ -0,0 +1,33 @@ +{ + "name": "Traditional Chinese (Hong Kong variant) to Simplified Chinese", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "TSPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "HKVariantsRevPhrases.ocd2" + }, { + "type": "ocd2", + "file": "HKVariantsRev.ocd2" + }] + } + }, { + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "TSPhrases.ocd2" + }, { + "type": "ocd2", + "file": "TSCharacters.ocd2" + }] + } + }] +} diff --git a/wasm-lib/dist/data/config/hk2t.json b/wasm-lib/dist/data/config/hk2t.json new file mode 100644 index 000000000..0d47b9174 --- /dev/null +++ b/wasm-lib/dist/data/config/hk2t.json @@ -0,0 +1,22 @@ +{ + "name": "Traditional Chinese (Hong Kong variant) to Traditional Chinese", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "HKVariantsRevPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "HKVariantsRevPhrases.ocd2" + }, { + "type": "ocd2", + "file": "HKVariantsRev.ocd2" + }] + } + }] +} diff --git a/wasm-lib/dist/data/config/jp2t.json b/wasm-lib/dist/data/config/jp2t.json new file mode 100644 index 000000000..025d89197 --- /dev/null +++ b/wasm-lib/dist/data/config/jp2t.json @@ -0,0 +1,25 @@ +{ + "name": "New Japanese Kanji (Shinjitai) to Traditional Chinese Characters (Kyūjitai)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "JPShinjitaiPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "JPShinjitaiPhrases.ocd2" + }, { + "type": "ocd2", + "file": "JPShinjitaiCharacters.ocd2" + }, { + "type": "ocd2", + "file": "JPVariantsRev.ocd2" + }] + } + }] +} diff --git a/wasm-lib/dist/data/config/s2hk.json b/wasm-lib/dist/data/config/s2hk.json new file mode 100644 index 000000000..fcaa017ee --- /dev/null +++ b/wasm-lib/dist/data/config/s2hk.json @@ -0,0 +1,27 @@ +{ + "name": "Simplified Chinese to Traditional Chinese (Hong Kong variant)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "STPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "STPhrases.ocd2" + }, { + "type": "ocd2", + "file": "STCharacters.ocd2" + }] + } + }, { + "dict": { + "type": "ocd2", + "file": "HKVariants.ocd2" + } + }] +} diff --git a/wasm-lib/dist/data/config/s2t.json b/wasm-lib/dist/data/config/s2t.json new file mode 100644 index 000000000..87516acbd --- /dev/null +++ b/wasm-lib/dist/data/config/s2t.json @@ -0,0 +1,22 @@ +{ + "name": "Simplified Chinese to Traditional Chinese", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "STPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "STPhrases.ocd2" + }, { + "type": "ocd2", + "file": "STCharacters.ocd2" + }] + } + }] +} diff --git a/wasm-lib/dist/data/config/s2tw.json b/wasm-lib/dist/data/config/s2tw.json new file mode 100644 index 000000000..2a3d7656b --- /dev/null +++ b/wasm-lib/dist/data/config/s2tw.json @@ -0,0 +1,27 @@ +{ + "name": "Simplified Chinese to Traditional Chinese (Taiwan standard)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "STPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "STPhrases.ocd2" + }, { + "type": "ocd2", + "file": "STCharacters.ocd2" + }] + } + }, { + "dict": { + "type": "ocd2", + "file": "TWVariants.ocd2" + } + }] +} diff --git a/wasm-lib/dist/data/config/s2twp.json b/wasm-lib/dist/data/config/s2twp.json new file mode 100644 index 000000000..2f36e9352 --- /dev/null +++ b/wasm-lib/dist/data/config/s2twp.json @@ -0,0 +1,32 @@ +{ + "name": "Simplified Chinese to Traditional Chinese (Taiwan standard, with phrases)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "STPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "STPhrases.ocd2" + }, { + "type": "ocd2", + "file": "STCharacters.ocd2" + }] + } + }, { + "dict": { + "type": "ocd2", + "file": "TWPhrases.ocd2" + } + }, { + "dict": { + "type": "ocd2", + "file": "TWVariants.ocd2" + } + }] +} diff --git a/wasm-lib/dist/data/config/t2hk.json b/wasm-lib/dist/data/config/t2hk.json new file mode 100644 index 000000000..519d4a3fd --- /dev/null +++ b/wasm-lib/dist/data/config/t2hk.json @@ -0,0 +1,16 @@ +{ + "name": "Traditional Chinese to Traditional Chinese (Hong Kong variant)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "HKVariants.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "ocd2", + "file": "HKVariants.ocd2" + } + }] +} diff --git a/wasm-lib/dist/data/config/t2jp.json b/wasm-lib/dist/data/config/t2jp.json new file mode 100644 index 000000000..7a43217ff --- /dev/null +++ b/wasm-lib/dist/data/config/t2jp.json @@ -0,0 +1,16 @@ +{ + "name": "Traditional Chinese Characters (Kyūjitai) to New Japanese Kanji (Shinjitai)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "JPVariants.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "ocd2", + "file": "JPVariants.ocd2" + } + }] +} diff --git a/wasm-lib/dist/data/config/t2s.json b/wasm-lib/dist/data/config/t2s.json new file mode 100644 index 000000000..06cf5f58e --- /dev/null +++ b/wasm-lib/dist/data/config/t2s.json @@ -0,0 +1,22 @@ +{ + "name": "Traditional Chinese to Simplified Chinese", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "TSPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "TSPhrases.ocd2" + }, { + "type": "ocd2", + "file": "TSCharacters.ocd2" + }] + } + }] +} diff --git a/wasm-lib/dist/data/config/t2tw.json b/wasm-lib/dist/data/config/t2tw.json new file mode 100644 index 000000000..0394f600d --- /dev/null +++ b/wasm-lib/dist/data/config/t2tw.json @@ -0,0 +1,16 @@ +{ + "name": "Traditional Chinese to Traditional Chinese (Taiwan standard)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "TWVariants.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "ocd2", + "file": "TWVariants.ocd2" + } + }] +} diff --git a/wasm-lib/dist/data/config/tw2s.json b/wasm-lib/dist/data/config/tw2s.json new file mode 100644 index 000000000..4f554393e --- /dev/null +++ b/wasm-lib/dist/data/config/tw2s.json @@ -0,0 +1,33 @@ +{ + "name": "Traditional Chinese (Taiwan standard) to Simplified Chinese", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "TSPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "TWVariantsRevPhrases.ocd2" + }, { + "type": "ocd2", + "file": "TWVariantsRev.ocd2" + }] + } + }, { + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "TSPhrases.ocd2" + }, { + "type": "ocd2", + "file": "TSCharacters.ocd2" + }] + } + }] +} diff --git a/wasm-lib/dist/data/config/tw2sp.json b/wasm-lib/dist/data/config/tw2sp.json new file mode 100644 index 000000000..64eb9d977 --- /dev/null +++ b/wasm-lib/dist/data/config/tw2sp.json @@ -0,0 +1,36 @@ +{ + "name": "Traditional Chinese (Taiwan standard) to Simplified Chinese (with phrases)", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "TSPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "TWPhrasesRev.ocd2" + }, { + "type": "ocd2", + "file": "TWVariantsRevPhrases.ocd2" + }, { + "type": "ocd2", + "file": "TWVariantsRev.ocd2" + }] + } + }, { + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "TSPhrases.ocd2" + }, { + "type": "ocd2", + "file": "TSCharacters.ocd2" + }] + } + }] +} diff --git a/wasm-lib/dist/data/config/tw2t.json b/wasm-lib/dist/data/config/tw2t.json new file mode 100644 index 000000000..ad5295b65 --- /dev/null +++ b/wasm-lib/dist/data/config/tw2t.json @@ -0,0 +1,22 @@ +{ + "name": "Traditional Chinese (Taiwan standard) to Traditional Chinese", + "segmentation": { + "type": "mmseg", + "dict": { + "type": "ocd2", + "file": "TWVariantsRevPhrases.ocd2" + } + }, + "conversion_chain": [{ + "dict": { + "type": "group", + "dicts": [{ + "type": "ocd2", + "file": "TWVariantsRevPhrases.ocd2" + }, { + "type": "ocd2", + "file": "TWVariantsRev.ocd2" + }] + } + }] +} diff --git a/wasm-lib/dist/data/dict/HKVariants.ocd2 b/wasm-lib/dist/data/dict/HKVariants.ocd2 new file mode 100644 index 000000000..fa2edde1c Binary files /dev/null and b/wasm-lib/dist/data/dict/HKVariants.ocd2 differ diff --git a/wasm-lib/dist/data/dict/HKVariantsRev.ocd2 b/wasm-lib/dist/data/dict/HKVariantsRev.ocd2 new file mode 100644 index 000000000..abc97a940 Binary files /dev/null and b/wasm-lib/dist/data/dict/HKVariantsRev.ocd2 differ diff --git a/wasm-lib/dist/data/dict/HKVariantsRevPhrases.ocd2 b/wasm-lib/dist/data/dict/HKVariantsRevPhrases.ocd2 new file mode 100644 index 000000000..848227724 Binary files /dev/null and b/wasm-lib/dist/data/dict/HKVariantsRevPhrases.ocd2 differ diff --git a/wasm-lib/dist/data/dict/JPShinjitaiCharacters.ocd2 b/wasm-lib/dist/data/dict/JPShinjitaiCharacters.ocd2 new file mode 100644 index 000000000..d27801472 Binary files /dev/null and b/wasm-lib/dist/data/dict/JPShinjitaiCharacters.ocd2 differ diff --git a/wasm-lib/dist/data/dict/JPShinjitaiPhrases.ocd2 b/wasm-lib/dist/data/dict/JPShinjitaiPhrases.ocd2 new file mode 100644 index 000000000..e4c823f11 Binary files /dev/null and b/wasm-lib/dist/data/dict/JPShinjitaiPhrases.ocd2 differ diff --git a/wasm-lib/dist/data/dict/JPVariants.ocd2 b/wasm-lib/dist/data/dict/JPVariants.ocd2 new file mode 100644 index 000000000..132be9a0e Binary files /dev/null and b/wasm-lib/dist/data/dict/JPVariants.ocd2 differ diff --git a/wasm-lib/dist/data/dict/JPVariantsRev.ocd2 b/wasm-lib/dist/data/dict/JPVariantsRev.ocd2 new file mode 100644 index 000000000..9c9f87e28 Binary files /dev/null and b/wasm-lib/dist/data/dict/JPVariantsRev.ocd2 differ diff --git a/wasm-lib/dist/data/dict/STCharacters.ocd2 b/wasm-lib/dist/data/dict/STCharacters.ocd2 new file mode 100644 index 000000000..5f355eb25 Binary files /dev/null and b/wasm-lib/dist/data/dict/STCharacters.ocd2 differ diff --git a/wasm-lib/dist/data/dict/STPhrases.ocd2 b/wasm-lib/dist/data/dict/STPhrases.ocd2 new file mode 100644 index 000000000..811eff0cc Binary files /dev/null and b/wasm-lib/dist/data/dict/STPhrases.ocd2 differ diff --git a/wasm-lib/dist/data/dict/TSCharacters.ocd2 b/wasm-lib/dist/data/dict/TSCharacters.ocd2 new file mode 100644 index 000000000..3c0d60a96 Binary files /dev/null and b/wasm-lib/dist/data/dict/TSCharacters.ocd2 differ diff --git a/wasm-lib/dist/data/dict/TSPhrases.ocd2 b/wasm-lib/dist/data/dict/TSPhrases.ocd2 new file mode 100644 index 000000000..0007aa199 Binary files /dev/null and b/wasm-lib/dist/data/dict/TSPhrases.ocd2 differ diff --git a/wasm-lib/dist/data/dict/TWPhrases.ocd2 b/wasm-lib/dist/data/dict/TWPhrases.ocd2 new file mode 100644 index 000000000..5f9014dbb Binary files /dev/null and b/wasm-lib/dist/data/dict/TWPhrases.ocd2 differ diff --git a/wasm-lib/dist/data/dict/TWPhrasesRev.ocd2 b/wasm-lib/dist/data/dict/TWPhrasesRev.ocd2 new file mode 100644 index 000000000..183e7c442 Binary files /dev/null and b/wasm-lib/dist/data/dict/TWPhrasesRev.ocd2 differ diff --git a/wasm-lib/dist/data/dict/TWVariants.ocd2 b/wasm-lib/dist/data/dict/TWVariants.ocd2 new file mode 100644 index 000000000..8d7439558 Binary files /dev/null and b/wasm-lib/dist/data/dict/TWVariants.ocd2 differ diff --git a/wasm-lib/dist/data/dict/TWVariantsRev.ocd2 b/wasm-lib/dist/data/dict/TWVariantsRev.ocd2 new file mode 100644 index 000000000..2546811c4 Binary files /dev/null and b/wasm-lib/dist/data/dict/TWVariantsRev.ocd2 differ diff --git a/wasm-lib/dist/data/dict/TWVariantsRevPhrases.ocd2 b/wasm-lib/dist/data/dict/TWVariantsRevPhrases.ocd2 new file mode 100644 index 000000000..95f26afcc Binary files /dev/null and b/wasm-lib/dist/data/dict/TWVariantsRevPhrases.ocd2 differ diff --git a/wasm-lib/dist/esm/index.js b/wasm-lib/dist/esm/index.js new file mode 100644 index 000000000..39ab4a479 --- /dev/null +++ b/wasm-lib/dist/esm/index.js @@ -0,0 +1,219 @@ +// Lightweight OpenCC WASM wrapper with opencc-js compatible API. +// 假定包内目录结构: +// - dist/opencc-wasm.js|.wasm +// - data/config/*.json +// - data/dict/*.ocd2 + +// Optional Node helpers (guarded for browser) +let fsMod = null; +let fileURLToPathFn = null; +const hasNode = typeof process !== "undefined" && !!process.versions?.node; +if (hasNode) { + fsMod = await import("node:fs"); + ({ fileURLToPath: fileURLToPathFn } = await import("node:url")); +} + +const BASE_URL = new URL("../", import.meta.url); + +const readFileText = (url) => { + if (!fsMod || !fileURLToPathFn) throw new Error("fs not available in this environment"); + const path = fileURLToPathFn(url); + return fsMod.readFileSync(path, "utf-8"); +}; + +const readFileBuffer = (url) => { + if (!fsMod || !fileURLToPathFn) throw new Error("fs not available in this environment"); + const path = fileURLToPathFn(url); + return fsMod.readFileSync(path); +}; + +// 预设映射:from -> to -> config 文件名 +const CONFIG_MAP = { + cn: { t: "s2t.json", tw: "s2tw.json", hk: "s2hk.json", cn: null }, + tw: { cn: "tw2s.json", t: "tw2t.json", tw: null }, + hk: { cn: "hk2s.json", t: "hk2t.json", hk: null }, + t: { cn: "t2s.json", tw: "t2tw.json", hk: "t2hk.json", jp: "t2jp.json", t: null }, + jp: { t: "jp2t.json" }, +}; + +// 缓存已加载的配置/字典与打开的句柄,避免重复加载和重复构建 +const loadedConfigs = new Set(); +const loadedDicts = new Set(); +const handles = new Map(); +let modulePromise = null; +let api = null; + +async function getModule() { + if (modulePromise) return modulePromise; + + // 1) 先确定包根目录(一定要以 / 结尾) + const pkgBase = new URL("./", import.meta.url); + // 如果这段代码在 HTML inline script 里,没有 import.meta.url,那就用绝对路径: + // const pkgBase = new URL("/vendor/opencc-wasm/", window.location.origin); + + // 2) import glue (from build/ for testing/development) + const glueUrl = new URL("./opencc-wasm.js", import.meta.url); + + const { default: create } = await import(glueUrl.href); + + // 3) locateFile 必须相对 pkgBase,而不是 glueUrl + modulePromise = create({ + locateFile: (p) => new URL(`../${p}`, import.meta.url).href + }); + + return modulePromise; +} + +async function getApi() { + const mod = await getModule(); + if (!api) { + api = { + create: mod.cwrap("opencc_create", "number", ["string"]), + convert: mod.cwrap("opencc_convert", "string", ["number", "string"]), + destroy: mod.cwrap("opencc_destroy", null, ["number"]), + }; + } + return { mod, api }; +} + +function collectOcd2Files(node, acc) { + if (!node || typeof node !== "object") return; + if (node.type === "ocd2" && node.file) acc.add(node.file); + if (node.type === "group" && Array.isArray(node.dicts)) { + node.dicts.forEach((d) => collectOcd2Files(d, acc)); + } +} + +async function fetchText(url) { + if (url.protocol === "file:") { + return readFileText(url); + } + const resp = await fetch(url.href); + if (!resp.ok) throw new Error(`Fetch ${url} failed: ${resp.status}`); + return resp.text(); +} + +async function fetchBuffer(url) { + if (url.protocol === "file:") { + return new Uint8Array(readFileBuffer(url)); + } + const resp = await fetch(url.href); + if (!resp.ok) throw new Error(`Fetch ${url} failed: ${resp.status}`); + return new Uint8Array(await resp.arrayBuffer()); +} + +async function ensureConfig(configName) { + if (handles.has(configName)) return handles.get(configName); + const { mod, api: apiFns } = await getApi(); + mod.FS.mkdirTree("/data/config"); + mod.FS.mkdirTree("/data/dict"); + const cfgUrl = new URL(`./data/config/${configName}`, BASE_URL); + const cfgJson = JSON.parse(await fetchText(cfgUrl)); + + const dicts = new Set(); + collectOcd2Files(cfgJson.segmentation?.dict, dicts); + if (Array.isArray(cfgJson.conversion_chain)) { + cfgJson.conversion_chain.forEach((item) => collectOcd2Files(item?.dict, dicts)); + } + for (const file of dicts) { + if (loadedDicts.has(file)) continue; // 避免重复加载同一字典 + const dictUrl = new URL(`./data/dict/${file}`, BASE_URL); + const buf = await fetchBuffer(dictUrl); + mod.FS.writeFile(`/data/dict/${file}`, buf); + loadedDicts.add(file); + } + // 重写配置中的 ocd2 路径到 /data/dict 下 + const patchPaths = (node) => { + if (!node || typeof node !== "object") return; + if (node.type === "ocd2" && node.file) { + node.file = `/data/dict/${node.file}`; + } + if (node.type === "group" && Array.isArray(node.dicts)) { + node.dicts.forEach(patchPaths); + } + }; + patchPaths(cfgJson.segmentation?.dict); + if (Array.isArray(cfgJson.conversion_chain)) { + cfgJson.conversion_chain.forEach((item) => patchPaths(item?.dict)); + } + mod.FS.writeFile(`/data/config/${configName}`, JSON.stringify(cfgJson)); + loadedConfigs.add(configName); + + const handle = apiFns.create(`/data/config/${configName}`); + if (!handle || handle < 0) { + throw new Error(`opencc_create failed for ${configName}`); + } + handles.set(configName, handle); + return handle; +} + +function resolveConfig(from, to) { + const f = (from || "").toLowerCase(); + const t = (to || "").toLowerCase(); + const m = CONFIG_MAP[f]; + if (!m || !(t in m)) { + throw new Error(`Unsupported conversion from '${from}' to '${to}'`); + } + return m[t]; // may be null for identical locale (no-op) +} + +function createConverter({ from, to, config }) { + const configName = config || resolveConfig(from, to); + return async (text) => { + if (configName === null) return text; // no-op + const handle = await ensureConfig(configName); + const { api: apiFns } = await getApi(); + return apiFns.convert(handle, text); + }; +} + +function CustomConverter(dictOrString) { + let pairs = []; + if (typeof dictOrString === "string") { + pairs = dictOrString + .split("|") + .map((seg) => seg.trim()) + .filter(Boolean) + .map((seg) => seg.split(/\s+/)) + .filter((arr) => arr.length >= 2) + .map(([a, b]) => [a, b]); + } else if (Array.isArray(dictOrString)) { + pairs = dictOrString; + } + // 按键长度降序,保证长词优先 + pairs.sort((a, b) => b[0].length - a[0].length); + return (text) => { + let out = text; + for (const [src, dst] of pairs) { + out = out.split(src).join(dst); + } + return out; + }; +} + +function ConverterFactory(fromLocale, toLocale, extraDicts = []) { + const conv = createConverter({ from: fromLocale, to: toLocale }); + const extras = extraDicts.map((d) => CustomConverter(d)); + return async (text) => { + let result = await conv(text); + extras.forEach((fn) => { + result = fn(result); + }); + return result; + }; +} + +export const OpenCC = { + Converter(opts) { + const fn = createConverter(opts); + return (text) => fn(text); + }, + CustomConverter, + ConverterFactory, + Locale: { + from: { cn: "cn", tw: "t", hk: "hk", jp: "jp", t: "t" }, + to: { cn: "cn", tw: "tw", hk: "hk", jp: "jp", t: "t" }, + }, +}; + +export default OpenCC; diff --git a/wasm-lib/dist/esm/opencc-wasm.js b/wasm-lib/dist/esm/opencc-wasm.js new file mode 100644 index 000000000..1f522d66f --- /dev/null +++ b/wasm-lib/dist/esm/opencc-wasm.js @@ -0,0 +1,2 @@ +async function createOpenCCWasm(moduleArg={}){var moduleRtn;var Module=moduleArg;var ENVIRONMENT_IS_WEB=!!globalThis.window;var ENVIRONMENT_IS_WORKER=!!globalThis.WorkerGlobalScope;var ENVIRONMENT_IS_NODE=globalThis.process?.versions?.node&&globalThis.process?.type!="renderer";if(ENVIRONMENT_IS_NODE){const{createRequire}=await import("module");var require=createRequire(import.meta.url)}var arguments_=[];var thisProgram="./this.program";var quit_=(status,toThrow)=>{throw toThrow};var _scriptName=import.meta.url;var scriptDirectory="";function locateFile(path){if(Module["locateFile"]){return Module["locateFile"](path,scriptDirectory)}return scriptDirectory+path}var readAsync,readBinary;if(ENVIRONMENT_IS_NODE){var fs=require("fs");if(_scriptName.startsWith("file:")){scriptDirectory=require("path").dirname(require("url").fileURLToPath(_scriptName))+"/"}readBinary=filename=>{filename=isFileURI(filename)?new URL(filename):filename;var ret=fs.readFileSync(filename);return ret};readAsync=async(filename,binary=true)=>{filename=isFileURI(filename)?new URL(filename):filename;var ret=fs.readFileSync(filename,binary?undefined:"utf8");return ret};if(process.argv.length>1){thisProgram=process.argv[1].replace(/\\/g,"/")}arguments_=process.argv.slice(2);quit_=(status,toThrow)=>{process.exitCode=status;throw toThrow}}else if(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER){try{scriptDirectory=new URL(".",_scriptName).href}catch{}{if(ENVIRONMENT_IS_WORKER){readBinary=url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.responseType="arraybuffer";xhr.send(null);return new Uint8Array(xhr.response)}}readAsync=async url=>{if(isFileURI(url)){return new Promise((resolve,reject)=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,true);xhr.responseType="arraybuffer";xhr.onload=()=>{if(xhr.status==200||xhr.status==0&&xhr.response){resolve(xhr.response);return}reject(xhr.status)};xhr.onerror=reject;xhr.send(null)})}var response=await fetch(url,{credentials:"same-origin"});if(response.ok){return response.arrayBuffer()}throw new Error(response.status+" : "+response.url)}}}else{}var out=console.log.bind(console);var err=console.error.bind(console);var wasmBinary;var ABORT=false;var isFileURI=filename=>filename.startsWith("file://");var readyPromiseResolve,readyPromiseReject;var HEAP8,HEAPU8,HEAP16,HEAPU16,HEAP32,HEAPU32,HEAPF32,HEAPF64;var HEAP64,HEAPU64;var runtimeInitialized=false;function updateMemoryViews(){var b=wasmMemory.buffer;HEAP8=new Int8Array(b);HEAP16=new Int16Array(b);HEAPU8=new Uint8Array(b);HEAPU16=new Uint16Array(b);HEAP32=new Int32Array(b);HEAPU32=new Uint32Array(b);HEAPF32=new Float32Array(b);HEAPF64=new Float64Array(b);HEAP64=new BigInt64Array(b);HEAPU64=new BigUint64Array(b)}function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="function")Module["preRun"]=[Module["preRun"]];while(Module["preRun"].length){addOnPreRun(Module["preRun"].shift())}}callRuntimeCallbacks(onPreRuns)}function initRuntime(){runtimeInitialized=true;if(!Module["noFSInit"]&&!FS.initialized)FS.init();TTY.init();wasmExports["__wasm_call_ctors"]();FS.ignorePermissions=false}function postRun(){if(Module["postRun"]){if(typeof Module["postRun"]=="function")Module["postRun"]=[Module["postRun"]];while(Module["postRun"].length){addOnPostRun(Module["postRun"].shift())}}callRuntimeCallbacks(onPostRuns)}function abort(what){Module["onAbort"]?.(what);what="Aborted("+what+")";err(what);ABORT=true;what+=". Build with -sASSERTIONS for more info.";var e=new WebAssembly.RuntimeError(what);readyPromiseReject?.(e);throw e}var wasmBinaryFile;function findWasmBinary(){if(Module["locateFile"]){return locateFile("opencc-wasm.esm.wasm")}return new URL("opencc-wasm.esm.wasm",import.meta.url).href}function getBinarySync(file){if(file==wasmBinaryFile&&wasmBinary){return new Uint8Array(wasmBinary)}if(readBinary){return readBinary(file)}throw"both async and sync fetching of the wasm failed"}async function getWasmBinary(binaryFile){if(!wasmBinary){try{var response=await readAsync(binaryFile);return new Uint8Array(response)}catch{}}return getBinarySync(binaryFile)}async function instantiateArrayBuffer(binaryFile,imports){try{var binary=await getWasmBinary(binaryFile);var instance=await WebAssembly.instantiate(binary,imports);return instance}catch(reason){err(`failed to asynchronously prepare wasm: ${reason}`);abort(reason)}}async function instantiateAsync(binary,binaryFile,imports){if(!binary&&!isFileURI(binaryFile)&&!ENVIRONMENT_IS_NODE){try{var response=fetch(binaryFile,{credentials:"same-origin"});var instantiationResult=await WebAssembly.instantiateStreaming(response,imports);return instantiationResult}catch(reason){err(`wasm streaming compile failed: ${reason}`);err("falling back to ArrayBuffer instantiation")}}return instantiateArrayBuffer(binaryFile,imports)}function getWasmImports(){var imports={env:wasmImports,wasi_snapshot_preview1:wasmImports};return imports}async function createWasm(){function receiveInstance(instance,module){wasmExports=instance.exports;assignWasmExports(wasmExports);updateMemoryViews();return wasmExports}function receiveInstantiationResult(result){return receiveInstance(result["instance"])}var info=getWasmImports();if(Module["instantiateWasm"]){return new Promise((resolve,reject)=>{Module["instantiateWasm"](info,(inst,mod)=>{resolve(receiveInstance(inst,mod))})})}wasmBinaryFile??=findWasmBinary();var result=await instantiateAsync(wasmBinary,wasmBinaryFile,info);var exports=receiveInstantiationResult(result);return exports}class ExitStatus{name="ExitStatus";constructor(status){this.message=`Program terminated with exit(${status})`;this.status=status}}var callRuntimeCallbacks=callbacks=>{while(callbacks.length>0){callbacks.shift()(Module)}};var onPostRuns=[];var addOnPostRun=cb=>onPostRuns.push(cb);var onPreRuns=[];var addOnPreRun=cb=>onPreRuns.push(cb);var noExitRuntime=true;var stackRestore=val=>__emscripten_stack_restore(val);var stackSave=()=>_emscripten_stack_get_current();var UTF8Decoder=globalThis.TextDecoder&&new TextDecoder;var findStringEnd=(heapOrArray,idx,maxBytesToRead,ignoreNul)=>{var maxIdx=idx+maxBytesToRead;if(ignoreNul)return maxIdx;while(heapOrArray[idx]&&!(idx>=maxIdx))++idx;return idx};var UTF8ArrayToString=(heapOrArray,idx=0,maxBytesToRead,ignoreNul)=>{var endPtr=findStringEnd(heapOrArray,idx,maxBytesToRead,ignoreNul);if(endPtr-idx>16&&heapOrArray.buffer&&UTF8Decoder){return UTF8Decoder.decode(heapOrArray.subarray(idx,endPtr))}var str="";while(idx>10,56320|ch&1023)}}return str};var UTF8ToString=(ptr,maxBytesToRead,ignoreNul)=>ptr?UTF8ArrayToString(HEAPU8,ptr,maxBytesToRead,ignoreNul):"";var ___assert_fail=(condition,filename,line,func)=>abort(`Assertion failed: ${UTF8ToString(condition)}, at: `+[filename?UTF8ToString(filename):"unknown filename",line,func?UTF8ToString(func):"unknown function"]);var exceptionCaught=[];var uncaughtExceptionCount=0;var ___cxa_begin_catch=ptr=>{var info=new ExceptionInfo(ptr);if(!info.get_caught()){info.set_caught(true);uncaughtExceptionCount--}info.set_rethrown(false);exceptionCaught.push(info);___cxa_increment_exception_refcount(ptr);return ___cxa_get_exception_ptr(ptr)};var exceptionLast=0;var ___cxa_end_catch=()=>{_setThrew(0,0);var info=exceptionCaught.pop();___cxa_decrement_exception_refcount(info.excPtr);exceptionLast=0};class ExceptionInfo{constructor(excPtr){this.excPtr=excPtr;this.ptr=excPtr-24}set_type(type){HEAPU32[this.ptr+4>>2]=type}get_type(){return HEAPU32[this.ptr+4>>2]}set_destructor(destructor){HEAPU32[this.ptr+8>>2]=destructor}get_destructor(){return HEAPU32[this.ptr+8>>2]}set_caught(caught){caught=caught?1:0;HEAP8[this.ptr+12]=caught}get_caught(){return HEAP8[this.ptr+12]!=0}set_rethrown(rethrown){rethrown=rethrown?1:0;HEAP8[this.ptr+13]=rethrown}get_rethrown(){return HEAP8[this.ptr+13]!=0}init(type,destructor){this.set_adjusted_ptr(0);this.set_type(type);this.set_destructor(destructor)}set_adjusted_ptr(adjustedPtr){HEAPU32[this.ptr+16>>2]=adjustedPtr}get_adjusted_ptr(){return HEAPU32[this.ptr+16>>2]}}var setTempRet0=val=>__emscripten_tempret_set(val);var findMatchingCatch=args=>{var thrown=exceptionLast;if(!thrown){setTempRet0(0);return 0}var info=new ExceptionInfo(thrown);info.set_adjusted_ptr(thrown);var thrownType=info.get_type();if(!thrownType){setTempRet0(0);return thrown}for(var caughtType of args){if(caughtType===0||caughtType===thrownType){break}var adjusted_ptr_addr=info.ptr+16;if(___cxa_can_catch(caughtType,thrownType,adjusted_ptr_addr)){setTempRet0(caughtType);return thrown}}setTempRet0(thrownType);return thrown};var ___cxa_find_matching_catch_2=()=>findMatchingCatch([]);var ___cxa_find_matching_catch_3=arg0=>findMatchingCatch([arg0]);var ___cxa_rethrow=()=>{var info=exceptionCaught.pop();if(!info){abort("no exception to throw")}var ptr=info.excPtr;if(!info.get_rethrown()){exceptionCaught.push(info);info.set_rethrown(true);info.set_caught(false);uncaughtExceptionCount++}exceptionLast=ptr;throw exceptionLast};var ___cxa_throw=(ptr,type,destructor)=>{var info=new ExceptionInfo(ptr);info.init(type,destructor);exceptionLast=ptr;uncaughtExceptionCount++;throw exceptionLast};var ___cxa_uncaught_exceptions=()=>uncaughtExceptionCount;var ___resumeException=ptr=>{if(!exceptionLast){exceptionLast=ptr}throw exceptionLast};var syscallGetVarargI=()=>{var ret=HEAP32[+SYSCALLS.varargs>>2];SYSCALLS.varargs+=4;return ret};var syscallGetVarargP=syscallGetVarargI;var PATH={isAbs:path=>path.charAt(0)==="/",splitPath:filename=>{var splitPathRe=/^(\/?|)([\s\S]*?)((?:\.{1,2}|[^\/]+?|)(\.[^.\/]*|))(?:[\/]*)$/;return splitPathRe.exec(filename).slice(1)},normalizeArray:(parts,allowAboveRoot)=>{var up=0;for(var i=parts.length-1;i>=0;i--){var last=parts[i];if(last==="."){parts.splice(i,1)}else if(last===".."){parts.splice(i,1);up++}else if(up){parts.splice(i,1);up--}}if(allowAboveRoot){for(;up;up--){parts.unshift("..")}}return parts},normalize:path=>{var isAbsolute=PATH.isAbs(path),trailingSlash=path.slice(-1)==="/";path=PATH.normalizeArray(path.split("/").filter(p=>!!p),!isAbsolute).join("/");if(!path&&!isAbsolute){path="."}if(path&&trailingSlash){path+="/"}return(isAbsolute?"/":"")+path},dirname:path=>{var result=PATH.splitPath(path),root=result[0],dir=result[1];if(!root&&!dir){return"."}if(dir){dir=dir.slice(0,-1)}return root+dir},basename:path=>path&&path.match(/([^\/]+|\/)\/*$/)[1],join:(...paths)=>PATH.normalize(paths.join("/")),join2:(l,r)=>PATH.normalize(l+"/"+r)};var initRandomFill=()=>{if(ENVIRONMENT_IS_NODE){var nodeCrypto=require("crypto");return view=>nodeCrypto.randomFillSync(view)}return view=>crypto.getRandomValues(view)};var randomFill=view=>{(randomFill=initRandomFill())(view)};var PATH_FS={resolve:(...args)=>{var resolvedPath="",resolvedAbsolute=false;for(var i=args.length-1;i>=-1&&!resolvedAbsolute;i--){var path=i>=0?args[i]:FS.cwd();if(typeof path!="string"){throw new TypeError("Arguments to path.resolve must be strings")}else if(!path){return""}resolvedPath=path+"/"+resolvedPath;resolvedAbsolute=PATH.isAbs(path)}resolvedPath=PATH.normalizeArray(resolvedPath.split("/").filter(p=>!!p),!resolvedAbsolute).join("/");return(resolvedAbsolute?"/":"")+resolvedPath||"."},relative:(from,to)=>{from=PATH_FS.resolve(from).slice(1);to=PATH_FS.resolve(to).slice(1);function trim(arr){var start=0;for(;start=0;end--){if(arr[end]!=="")break}if(start>end)return[];return arr.slice(start,end-start+1)}var fromParts=trim(from.split("/"));var toParts=trim(to.split("/"));var length=Math.min(fromParts.length,toParts.length);var samePartsLength=length;for(var i=0;i{var len=0;for(var i=0;i=55296&&c<=57343){len+=4;++i}else{len+=3}}return len};var stringToUTF8Array=(str,heap,outIdx,maxBytesToWrite)=>{if(!(maxBytesToWrite>0))return 0;var startIdx=outIdx;var endIdx=outIdx+maxBytesToWrite-1;for(var i=0;i=endIdx)break;heap[outIdx++]=u}else if(u<=2047){if(outIdx+1>=endIdx)break;heap[outIdx++]=192|u>>6;heap[outIdx++]=128|u&63}else if(u<=65535){if(outIdx+2>=endIdx)break;heap[outIdx++]=224|u>>12;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}else{if(outIdx+3>=endIdx)break;heap[outIdx++]=240|u>>18;heap[outIdx++]=128|u>>12&63;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63;i++}}heap[outIdx]=0;return outIdx-startIdx};var intArrayFromString=(stringy,dontAddNull,length)=>{var len=length>0?length:lengthBytesUTF8(stringy)+1;var u8array=new Array(len);var numBytesWritten=stringToUTF8Array(stringy,u8array,0,u8array.length);if(dontAddNull)u8array.length=numBytesWritten;return u8array};var FS_stdin_getChar=()=>{if(!FS_stdin_getChar_buffer.length){var result=null;if(ENVIRONMENT_IS_NODE){var BUFSIZE=256;var buf=Buffer.alloc(BUFSIZE);var bytesRead=0;var fd=process.stdin.fd;try{bytesRead=fs.readSync(fd,buf,0,BUFSIZE)}catch(e){if(e.toString().includes("EOF"))bytesRead=0;else throw e}if(bytesRead>0){result=buf.slice(0,bytesRead).toString("utf-8")}}else if(globalThis.window?.prompt){result=window.prompt("Input: ");if(result!==null){result+="\n"}}else{}if(!result){return null}FS_stdin_getChar_buffer=intArrayFromString(result,true)}return FS_stdin_getChar_buffer.shift()};var TTY={ttys:[],init(){},shutdown(){},register(dev,ops){TTY.ttys[dev]={input:[],output:[],ops};FS.registerDevice(dev,TTY.stream_ops)},stream_ops:{open(stream){var tty=TTY.ttys[stream.node.rdev];if(!tty){throw new FS.ErrnoError(43)}stream.tty=tty;stream.seekable=false},close(stream){stream.tty.ops.fsync(stream.tty)},fsync(stream){stream.tty.ops.fsync(stream.tty)},read(stream,buffer,offset,length,pos){if(!stream.tty||!stream.tty.ops.get_char){throw new FS.ErrnoError(60)}var bytesRead=0;for(var i=0;i0){out(UTF8ArrayToString(tty.output));tty.output=[]}},ioctl_tcgets(tty){return{c_iflag:25856,c_oflag:5,c_cflag:191,c_lflag:35387,c_cc:[3,28,127,21,4,0,1,0,17,19,26,0,18,15,23,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}},ioctl_tcsets(tty,optional_actions,data){return 0},ioctl_tiocgwinsz(tty){return[24,80]}},default_tty1_ops:{put_char(tty,val){if(val===null||val===10){err(UTF8ArrayToString(tty.output));tty.output=[]}else{if(val!=0)tty.output.push(val)}},fsync(tty){if(tty.output?.length>0){err(UTF8ArrayToString(tty.output));tty.output=[]}}}};var zeroMemory=(ptr,size)=>HEAPU8.fill(0,ptr,ptr+size);var alignMemory=(size,alignment)=>Math.ceil(size/alignment)*alignment;var mmapAlloc=size=>{size=alignMemory(size,65536);var ptr=_emscripten_builtin_memalign(65536,size);if(ptr)zeroMemory(ptr,size);return ptr};var MEMFS={ops_table:null,mount(mount){return MEMFS.createNode(null,"/",16895,0)},createNode(parent,name,mode,dev){if(FS.isBlkdev(mode)||FS.isFIFO(mode)){throw new FS.ErrnoError(63)}MEMFS.ops_table||={dir:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr,lookup:MEMFS.node_ops.lookup,mknod:MEMFS.node_ops.mknod,rename:MEMFS.node_ops.rename,unlink:MEMFS.node_ops.unlink,rmdir:MEMFS.node_ops.rmdir,readdir:MEMFS.node_ops.readdir,symlink:MEMFS.node_ops.symlink},stream:{llseek:MEMFS.stream_ops.llseek}},file:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr},stream:{llseek:MEMFS.stream_ops.llseek,read:MEMFS.stream_ops.read,write:MEMFS.stream_ops.write,mmap:MEMFS.stream_ops.mmap,msync:MEMFS.stream_ops.msync}},link:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr,readlink:MEMFS.node_ops.readlink},stream:{}},chrdev:{node:{getattr:MEMFS.node_ops.getattr,setattr:MEMFS.node_ops.setattr},stream:FS.chrdev_stream_ops}};var node=FS.createNode(parent,name,mode,dev);if(FS.isDir(node.mode)){node.node_ops=MEMFS.ops_table.dir.node;node.stream_ops=MEMFS.ops_table.dir.stream;node.contents={}}else if(FS.isFile(node.mode)){node.node_ops=MEMFS.ops_table.file.node;node.stream_ops=MEMFS.ops_table.file.stream;node.usedBytes=0;node.contents=null}else if(FS.isLink(node.mode)){node.node_ops=MEMFS.ops_table.link.node;node.stream_ops=MEMFS.ops_table.link.stream}else if(FS.isChrdev(node.mode)){node.node_ops=MEMFS.ops_table.chrdev.node;node.stream_ops=MEMFS.ops_table.chrdev.stream}node.atime=node.mtime=node.ctime=Date.now();if(parent){parent.contents[name]=node;parent.atime=parent.mtime=parent.ctime=node.atime}return node},getFileDataAsTypedArray(node){if(!node.contents)return new Uint8Array(0);if(node.contents.subarray)return node.contents.subarray(0,node.usedBytes);return new Uint8Array(node.contents)},expandFileStorage(node,newCapacity){var prevCapacity=node.contents?node.contents.length:0;if(prevCapacity>=newCapacity)return;var CAPACITY_DOUBLING_MAX=1024*1024;newCapacity=Math.max(newCapacity,prevCapacity*(prevCapacity>>0);if(prevCapacity!=0)newCapacity=Math.max(newCapacity,256);var oldContents=node.contents;node.contents=new Uint8Array(newCapacity);if(node.usedBytes>0)node.contents.set(oldContents.subarray(0,node.usedBytes),0)},resizeFileStorage(node,newSize){if(node.usedBytes==newSize)return;if(newSize==0){node.contents=null;node.usedBytes=0}else{var oldContents=node.contents;node.contents=new Uint8Array(newSize);if(oldContents){node.contents.set(oldContents.subarray(0,Math.min(newSize,node.usedBytes)))}node.usedBytes=newSize}},node_ops:{getattr(node){var attr={};attr.dev=FS.isChrdev(node.mode)?node.id:1;attr.ino=node.id;attr.mode=node.mode;attr.nlink=1;attr.uid=0;attr.gid=0;attr.rdev=node.rdev;if(FS.isDir(node.mode)){attr.size=4096}else if(FS.isFile(node.mode)){attr.size=node.usedBytes}else if(FS.isLink(node.mode)){attr.size=node.link.length}else{attr.size=0}attr.atime=new Date(node.atime);attr.mtime=new Date(node.mtime);attr.ctime=new Date(node.ctime);attr.blksize=4096;attr.blocks=Math.ceil(attr.size/attr.blksize);return attr},setattr(node,attr){for(const key of["mode","atime","mtime","ctime"]){if(attr[key]!=null){node[key]=attr[key]}}if(attr.size!==undefined){MEMFS.resizeFileStorage(node,attr.size)}},lookup(parent,name){if(!MEMFS.doesNotExistError){MEMFS.doesNotExistError=new FS.ErrnoError(44);MEMFS.doesNotExistError.stack=""}throw MEMFS.doesNotExistError},mknod(parent,name,mode,dev){return MEMFS.createNode(parent,name,mode,dev)},rename(old_node,new_dir,new_name){var new_node;try{new_node=FS.lookupNode(new_dir,new_name)}catch(e){}if(new_node){if(FS.isDir(old_node.mode)){for(var i in new_node.contents){throw new FS.ErrnoError(55)}}FS.hashRemoveNode(new_node)}delete old_node.parent.contents[old_node.name];new_dir.contents[new_name]=old_node;old_node.name=new_name;new_dir.ctime=new_dir.mtime=old_node.parent.ctime=old_node.parent.mtime=Date.now()},unlink(parent,name){delete parent.contents[name];parent.ctime=parent.mtime=Date.now()},rmdir(parent,name){var node=FS.lookupNode(parent,name);for(var i in node.contents){throw new FS.ErrnoError(55)}delete parent.contents[name];parent.ctime=parent.mtime=Date.now()},readdir(node){return[".","..",...Object.keys(node.contents)]},symlink(parent,newname,oldpath){var node=MEMFS.createNode(parent,newname,511|40960,0);node.link=oldpath;return node},readlink(node){if(!FS.isLink(node.mode)){throw new FS.ErrnoError(28)}return node.link}},stream_ops:{read(stream,buffer,offset,length,position){var contents=stream.node.contents;if(position>=stream.node.usedBytes)return 0;var size=Math.min(stream.node.usedBytes-position,length);if(size>8&&contents.subarray){buffer.set(contents.subarray(position,position+size),offset)}else{for(var i=0;i0||position+length{var flagModes={r:0,"r+":2,w:512|64|1,"w+":512|64|2,a:1024|64|1,"a+":1024|64|2};var flags=flagModes[str];if(typeof flags=="undefined"){throw new Error(`Unknown file open mode: ${str}`)}return flags};var FS_getMode=(canRead,canWrite)=>{var mode=0;if(canRead)mode|=292|73;if(canWrite)mode|=146;return mode};var asyncLoad=async url=>{var arrayBuffer=await readAsync(url);return new Uint8Array(arrayBuffer)};var FS_createDataFile=(...args)=>FS.createDataFile(...args);var getUniqueRunDependency=id=>id;var runDependencies=0;var dependenciesFulfilled=null;var removeRunDependency=id=>{runDependencies--;Module["monitorRunDependencies"]?.(runDependencies);if(runDependencies==0){if(dependenciesFulfilled){var callback=dependenciesFulfilled;dependenciesFulfilled=null;callback()}}};var addRunDependency=id=>{runDependencies++;Module["monitorRunDependencies"]?.(runDependencies)};var preloadPlugins=[];var FS_handledByPreloadPlugin=async(byteArray,fullname)=>{if(typeof Browser!="undefined")Browser.init();for(var plugin of preloadPlugins){if(plugin["canHandle"](fullname)){return plugin["handle"](byteArray,fullname)}}return byteArray};var FS_preloadFile=async(parent,name,url,canRead,canWrite,dontCreateFile,canOwn,preFinish)=>{var fullname=name?PATH_FS.resolve(PATH.join2(parent,name)):parent;var dep=getUniqueRunDependency(`cp ${fullname}`);addRunDependency(dep);try{var byteArray=url;if(typeof url=="string"){byteArray=await asyncLoad(url)}byteArray=await FS_handledByPreloadPlugin(byteArray,fullname);preFinish?.();if(!dontCreateFile){FS_createDataFile(parent,name,byteArray,canRead,canWrite,canOwn)}}finally{removeRunDependency(dep)}};var FS_createPreloadedFile=(parent,name,url,canRead,canWrite,onload,onerror,dontCreateFile,canOwn,preFinish)=>{FS_preloadFile(parent,name,url,canRead,canWrite,dontCreateFile,canOwn,preFinish).then(onload).catch(onerror)};var FS={root:null,mounts:[],devices:{},streams:[],nextInode:1,nameTable:null,currentPath:"/",initialized:false,ignorePermissions:true,filesystems:null,syncFSRequests:0,readFiles:{},ErrnoError:class{name="ErrnoError";constructor(errno){this.errno=errno}},FSStream:class{shared={};get object(){return this.node}set object(val){this.node=val}get isRead(){return(this.flags&2097155)!==1}get isWrite(){return(this.flags&2097155)!==0}get isAppend(){return this.flags&1024}get flags(){return this.shared.flags}set flags(val){this.shared.flags=val}get position(){return this.shared.position}set position(val){this.shared.position=val}},FSNode:class{node_ops={};stream_ops={};readMode=292|73;writeMode=146;mounted=null;constructor(parent,name,mode,rdev){if(!parent){parent=this}this.parent=parent;this.mount=parent.mount;this.id=FS.nextInode++;this.name=name;this.mode=mode;this.rdev=rdev;this.atime=this.mtime=this.ctime=Date.now()}get read(){return(this.mode&this.readMode)===this.readMode}set read(val){val?this.mode|=this.readMode:this.mode&=~this.readMode}get write(){return(this.mode&this.writeMode)===this.writeMode}set write(val){val?this.mode|=this.writeMode:this.mode&=~this.writeMode}get isFolder(){return FS.isDir(this.mode)}get isDevice(){return FS.isChrdev(this.mode)}},lookupPath(path,opts={}){if(!path){throw new FS.ErrnoError(44)}opts.follow_mount??=true;if(!PATH.isAbs(path)){path=FS.cwd()+"/"+path}linkloop:for(var nlinks=0;nlinks<40;nlinks++){var parts=path.split("/").filter(p=>!!p);var current=FS.root;var current_path="/";for(var i=0;i>>0)%FS.nameTable.length},hashAddNode(node){var hash=FS.hashName(node.parent.id,node.name);node.name_next=FS.nameTable[hash];FS.nameTable[hash]=node},hashRemoveNode(node){var hash=FS.hashName(node.parent.id,node.name);if(FS.nameTable[hash]===node){FS.nameTable[hash]=node.name_next}else{var current=FS.nameTable[hash];while(current){if(current.name_next===node){current.name_next=node.name_next;break}current=current.name_next}}},lookupNode(parent,name){var errCode=FS.mayLookup(parent);if(errCode){throw new FS.ErrnoError(errCode)}var hash=FS.hashName(parent.id,name);for(var node=FS.nameTable[hash];node;node=node.name_next){var nodeName=node.name;if(node.parent.id===parent.id&&nodeName===name){return node}}return FS.lookup(parent,name)},createNode(parent,name,mode,rdev){var node=new FS.FSNode(parent,name,mode,rdev);FS.hashAddNode(node);return node},destroyNode(node){FS.hashRemoveNode(node)},isRoot(node){return node===node.parent},isMountpoint(node){return!!node.mounted},isFile(mode){return(mode&61440)===32768},isDir(mode){return(mode&61440)===16384},isLink(mode){return(mode&61440)===40960},isChrdev(mode){return(mode&61440)===8192},isBlkdev(mode){return(mode&61440)===24576},isFIFO(mode){return(mode&61440)===4096},isSocket(mode){return(mode&49152)===49152},flagsToPermissionString(flag){var perms=["r","w","rw"][flag&3];if(flag&512){perms+="w"}return perms},nodePermissions(node,perms){if(FS.ignorePermissions){return 0}if(perms.includes("r")&&!(node.mode&292)){return 2}else if(perms.includes("w")&&!(node.mode&146)){return 2}else if(perms.includes("x")&&!(node.mode&73)){return 2}return 0},mayLookup(dir){if(!FS.isDir(dir.mode))return 54;var errCode=FS.nodePermissions(dir,"x");if(errCode)return errCode;if(!dir.node_ops.lookup)return 2;return 0},mayCreate(dir,name){if(!FS.isDir(dir.mode)){return 54}try{var node=FS.lookupNode(dir,name);return 20}catch(e){}return FS.nodePermissions(dir,"wx")},mayDelete(dir,name,isdir){var node;try{node=FS.lookupNode(dir,name)}catch(e){return e.errno}var errCode=FS.nodePermissions(dir,"wx");if(errCode){return errCode}if(isdir){if(!FS.isDir(node.mode)){return 54}if(FS.isRoot(node)||FS.getPath(node)===FS.cwd()){return 10}}else{if(FS.isDir(node.mode)){return 31}}return 0},mayOpen(node,flags){if(!node){return 44}if(FS.isLink(node.mode)){return 32}else if(FS.isDir(node.mode)){if(FS.flagsToPermissionString(flags)!=="r"||flags&(512|64)){return 31}}return FS.nodePermissions(node,FS.flagsToPermissionString(flags))},checkOpExists(op,err){if(!op){throw new FS.ErrnoError(err)}return op},MAX_OPEN_FDS:4096,nextfd(){for(var fd=0;fd<=FS.MAX_OPEN_FDS;fd++){if(!FS.streams[fd]){return fd}}throw new FS.ErrnoError(33)},getStreamChecked(fd){var stream=FS.getStream(fd);if(!stream){throw new FS.ErrnoError(8)}return stream},getStream:fd=>FS.streams[fd],createStream(stream,fd=-1){stream=Object.assign(new FS.FSStream,stream);if(fd==-1){fd=FS.nextfd()}stream.fd=fd;FS.streams[fd]=stream;return stream},closeStream(fd){FS.streams[fd]=null},dupStream(origStream,fd=-1){var stream=FS.createStream(origStream,fd);stream.stream_ops?.dup?.(stream);return stream},doSetAttr(stream,node,attr){var setattr=stream?.stream_ops.setattr;var arg=setattr?stream:node;setattr??=node.node_ops.setattr;FS.checkOpExists(setattr,63);setattr(arg,attr)},chrdev_stream_ops:{open(stream){var device=FS.getDevice(stream.node.rdev);stream.stream_ops=device.stream_ops;stream.stream_ops.open?.(stream)},llseek(){throw new FS.ErrnoError(70)}},major:dev=>dev>>8,minor:dev=>dev&255,makedev:(ma,mi)=>ma<<8|mi,registerDevice(dev,ops){FS.devices[dev]={stream_ops:ops}},getDevice:dev=>FS.devices[dev],getMounts(mount){var mounts=[];var check=[mount];while(check.length){var m=check.pop();mounts.push(m);check.push(...m.mounts)}return mounts},syncfs(populate,callback){if(typeof populate=="function"){callback=populate;populate=false}FS.syncFSRequests++;if(FS.syncFSRequests>1){err(`warning: ${FS.syncFSRequests} FS.syncfs operations in flight at once, probably just doing extra work`)}var mounts=FS.getMounts(FS.root.mount);var completed=0;function doCallback(errCode){FS.syncFSRequests--;return callback(errCode)}function done(errCode){if(errCode){if(!done.errored){done.errored=true;return doCallback(errCode)}return}if(++completed>=mounts.length){doCallback(null)}}for(var mount of mounts){if(mount.type.syncfs){mount.type.syncfs(mount,populate,done)}else{done(null)}}},mount(type,opts,mountpoint){var root=mountpoint==="/";var pseudo=!mountpoint;var node;if(root&&FS.root){throw new FS.ErrnoError(10)}else if(!root&&!pseudo){var lookup=FS.lookupPath(mountpoint,{follow_mount:false});mountpoint=lookup.path;node=lookup.node;if(FS.isMountpoint(node)){throw new FS.ErrnoError(10)}if(!FS.isDir(node.mode)){throw new FS.ErrnoError(54)}}var mount={type,opts,mountpoint,mounts:[]};var mountRoot=type.mount(mount);mountRoot.mount=mount;mount.root=mountRoot;if(root){FS.root=mountRoot}else if(node){node.mounted=mount;if(node.mount){node.mount.mounts.push(mount)}}return mountRoot},unmount(mountpoint){var lookup=FS.lookupPath(mountpoint,{follow_mount:false});if(!FS.isMountpoint(lookup.node)){throw new FS.ErrnoError(28)}var node=lookup.node;var mount=node.mounted;var mounts=FS.getMounts(mount);for(var[hash,current]of Object.entries(FS.nameTable)){while(current){var next=current.name_next;if(mounts.includes(current.mount)){FS.destroyNode(current)}current=next}}node.mounted=null;var idx=node.mount.mounts.indexOf(mount);node.mount.mounts.splice(idx,1)},lookup(parent,name){return parent.node_ops.lookup(parent,name)},mknod(path,mode,dev){var lookup=FS.lookupPath(path,{parent:true});var parent=lookup.node;var name=PATH.basename(path);if(!name){throw new FS.ErrnoError(28)}if(name==="."||name===".."){throw new FS.ErrnoError(20)}var errCode=FS.mayCreate(parent,name);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.mknod){throw new FS.ErrnoError(63)}return parent.node_ops.mknod(parent,name,mode,dev)},statfs(path){return FS.statfsNode(FS.lookupPath(path,{follow:true}).node)},statfsStream(stream){return FS.statfsNode(stream.node)},statfsNode(node){var rtn={bsize:4096,frsize:4096,blocks:1e6,bfree:5e5,bavail:5e5,files:FS.nextInode,ffree:FS.nextInode-1,fsid:42,flags:2,namelen:255};if(node.node_ops.statfs){Object.assign(rtn,node.node_ops.statfs(node.mount.opts.root))}return rtn},create(path,mode=438){mode&=4095;mode|=32768;return FS.mknod(path,mode,0)},mkdir(path,mode=511){mode&=511|512;mode|=16384;return FS.mknod(path,mode,0)},mkdirTree(path,mode){var dirs=path.split("/");var d="";for(var dir of dirs){if(!dir)continue;if(d||PATH.isAbs(path))d+="/";d+=dir;try{FS.mkdir(d,mode)}catch(e){if(e.errno!=20)throw e}}},mkdev(path,mode,dev){if(typeof dev=="undefined"){dev=mode;mode=438}mode|=8192;return FS.mknod(path,mode,dev)},symlink(oldpath,newpath){if(!PATH_FS.resolve(oldpath)){throw new FS.ErrnoError(44)}var lookup=FS.lookupPath(newpath,{parent:true});var parent=lookup.node;if(!parent){throw new FS.ErrnoError(44)}var newname=PATH.basename(newpath);var errCode=FS.mayCreate(parent,newname);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.symlink){throw new FS.ErrnoError(63)}return parent.node_ops.symlink(parent,newname,oldpath)},rename(old_path,new_path){var old_dirname=PATH.dirname(old_path);var new_dirname=PATH.dirname(new_path);var old_name=PATH.basename(old_path);var new_name=PATH.basename(new_path);var lookup,old_dir,new_dir;lookup=FS.lookupPath(old_path,{parent:true});old_dir=lookup.node;lookup=FS.lookupPath(new_path,{parent:true});new_dir=lookup.node;if(!old_dir||!new_dir)throw new FS.ErrnoError(44);if(old_dir.mount!==new_dir.mount){throw new FS.ErrnoError(75)}var old_node=FS.lookupNode(old_dir,old_name);var relative=PATH_FS.relative(old_path,new_dirname);if(relative.charAt(0)!=="."){throw new FS.ErrnoError(28)}relative=PATH_FS.relative(new_path,old_dirname);if(relative.charAt(0)!=="."){throw new FS.ErrnoError(55)}var new_node;try{new_node=FS.lookupNode(new_dir,new_name)}catch(e){}if(old_node===new_node){return}var isdir=FS.isDir(old_node.mode);var errCode=FS.mayDelete(old_dir,old_name,isdir);if(errCode){throw new FS.ErrnoError(errCode)}errCode=new_node?FS.mayDelete(new_dir,new_name,isdir):FS.mayCreate(new_dir,new_name);if(errCode){throw new FS.ErrnoError(errCode)}if(!old_dir.node_ops.rename){throw new FS.ErrnoError(63)}if(FS.isMountpoint(old_node)||new_node&&FS.isMountpoint(new_node)){throw new FS.ErrnoError(10)}if(new_dir!==old_dir){errCode=FS.nodePermissions(old_dir,"w");if(errCode){throw new FS.ErrnoError(errCode)}}FS.hashRemoveNode(old_node);try{old_dir.node_ops.rename(old_node,new_dir,new_name);old_node.parent=new_dir}catch(e){throw e}finally{FS.hashAddNode(old_node)}},rmdir(path){var lookup=FS.lookupPath(path,{parent:true});var parent=lookup.node;var name=PATH.basename(path);var node=FS.lookupNode(parent,name);var errCode=FS.mayDelete(parent,name,true);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.rmdir){throw new FS.ErrnoError(63)}if(FS.isMountpoint(node)){throw new FS.ErrnoError(10)}parent.node_ops.rmdir(parent,name);FS.destroyNode(node)},readdir(path){var lookup=FS.lookupPath(path,{follow:true});var node=lookup.node;var readdir=FS.checkOpExists(node.node_ops.readdir,54);return readdir(node)},unlink(path){var lookup=FS.lookupPath(path,{parent:true});var parent=lookup.node;if(!parent){throw new FS.ErrnoError(44)}var name=PATH.basename(path);var node=FS.lookupNode(parent,name);var errCode=FS.mayDelete(parent,name,false);if(errCode){throw new FS.ErrnoError(errCode)}if(!parent.node_ops.unlink){throw new FS.ErrnoError(63)}if(FS.isMountpoint(node)){throw new FS.ErrnoError(10)}parent.node_ops.unlink(parent,name);FS.destroyNode(node)},readlink(path){var lookup=FS.lookupPath(path);var link=lookup.node;if(!link){throw new FS.ErrnoError(44)}if(!link.node_ops.readlink){throw new FS.ErrnoError(28)}return link.node_ops.readlink(link)},stat(path,dontFollow){var lookup=FS.lookupPath(path,{follow:!dontFollow});var node=lookup.node;var getattr=FS.checkOpExists(node.node_ops.getattr,63);return getattr(node)},fstat(fd){var stream=FS.getStreamChecked(fd);var node=stream.node;var getattr=stream.stream_ops.getattr;var arg=getattr?stream:node;getattr??=node.node_ops.getattr;FS.checkOpExists(getattr,63);return getattr(arg)},lstat(path){return FS.stat(path,true)},doChmod(stream,node,mode,dontFollow){FS.doSetAttr(stream,node,{mode:mode&4095|node.mode&~4095,ctime:Date.now(),dontFollow})},chmod(path,mode,dontFollow){var node;if(typeof path=="string"){var lookup=FS.lookupPath(path,{follow:!dontFollow});node=lookup.node}else{node=path}FS.doChmod(null,node,mode,dontFollow)},lchmod(path,mode){FS.chmod(path,mode,true)},fchmod(fd,mode){var stream=FS.getStreamChecked(fd);FS.doChmod(stream,stream.node,mode,false)},doChown(stream,node,dontFollow){FS.doSetAttr(stream,node,{timestamp:Date.now(),dontFollow})},chown(path,uid,gid,dontFollow){var node;if(typeof path=="string"){var lookup=FS.lookupPath(path,{follow:!dontFollow});node=lookup.node}else{node=path}FS.doChown(null,node,dontFollow)},lchown(path,uid,gid){FS.chown(path,uid,gid,true)},fchown(fd,uid,gid){var stream=FS.getStreamChecked(fd);FS.doChown(stream,stream.node,false)},doTruncate(stream,node,len){if(FS.isDir(node.mode)){throw new FS.ErrnoError(31)}if(!FS.isFile(node.mode)){throw new FS.ErrnoError(28)}var errCode=FS.nodePermissions(node,"w");if(errCode){throw new FS.ErrnoError(errCode)}FS.doSetAttr(stream,node,{size:len,timestamp:Date.now()})},truncate(path,len){if(len<0){throw new FS.ErrnoError(28)}var node;if(typeof path=="string"){var lookup=FS.lookupPath(path,{follow:true});node=lookup.node}else{node=path}FS.doTruncate(null,node,len)},ftruncate(fd,len){var stream=FS.getStreamChecked(fd);if(len<0||(stream.flags&2097155)===0){throw new FS.ErrnoError(28)}FS.doTruncate(stream,stream.node,len)},utime(path,atime,mtime){var lookup=FS.lookupPath(path,{follow:true});var node=lookup.node;var setattr=FS.checkOpExists(node.node_ops.setattr,63);setattr(node,{atime,mtime})},open(path,flags,mode=438){if(path===""){throw new FS.ErrnoError(44)}flags=typeof flags=="string"?FS_modeStringToFlags(flags):flags;if(flags&64){mode=mode&4095|32768}else{mode=0}var node;var isDirPath;if(typeof path=="object"){node=path}else{isDirPath=path.endsWith("/");var lookup=FS.lookupPath(path,{follow:!(flags&131072),noent_okay:true});node=lookup.node;path=lookup.path}var created=false;if(flags&64){if(node){if(flags&128){throw new FS.ErrnoError(20)}}else if(isDirPath){throw new FS.ErrnoError(31)}else{node=FS.mknod(path,mode|511,0);created=true}}if(!node){throw new FS.ErrnoError(44)}if(FS.isChrdev(node.mode)){flags&=~512}if(flags&65536&&!FS.isDir(node.mode)){throw new FS.ErrnoError(54)}if(!created){var errCode=FS.mayOpen(node,flags);if(errCode){throw new FS.ErrnoError(errCode)}}if(flags&512&&!created){FS.truncate(node,0)}flags&=~(128|512|131072);var stream=FS.createStream({node,path:FS.getPath(node),flags,seekable:true,position:0,stream_ops:node.stream_ops,ungotten:[],error:false});if(stream.stream_ops.open){stream.stream_ops.open(stream)}if(created){FS.chmod(node,mode&511)}if(Module["logReadFiles"]&&!(flags&1)){if(!(path in FS.readFiles)){FS.readFiles[path]=1}}return stream},close(stream){if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if(stream.getdents)stream.getdents=null;try{if(stream.stream_ops.close){stream.stream_ops.close(stream)}}catch(e){throw e}finally{FS.closeStream(stream.fd)}stream.fd=null},isClosed(stream){return stream.fd===null},llseek(stream,offset,whence){if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if(!stream.seekable||!stream.stream_ops.llseek){throw new FS.ErrnoError(70)}if(whence!=0&&whence!=1&&whence!=2){throw new FS.ErrnoError(28)}stream.position=stream.stream_ops.llseek(stream,offset,whence);stream.ungotten=[];return stream.position},read(stream,buffer,offset,length,position){if(length<0||position<0){throw new FS.ErrnoError(28)}if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if((stream.flags&2097155)===1){throw new FS.ErrnoError(8)}if(FS.isDir(stream.node.mode)){throw new FS.ErrnoError(31)}if(!stream.stream_ops.read){throw new FS.ErrnoError(28)}var seeking=typeof position!="undefined";if(!seeking){position=stream.position}else if(!stream.seekable){throw new FS.ErrnoError(70)}var bytesRead=stream.stream_ops.read(stream,buffer,offset,length,position);if(!seeking)stream.position+=bytesRead;return bytesRead},write(stream,buffer,offset,length,position,canOwn){if(length<0||position<0){throw new FS.ErrnoError(28)}if(FS.isClosed(stream)){throw new FS.ErrnoError(8)}if((stream.flags&2097155)===0){throw new FS.ErrnoError(8)}if(FS.isDir(stream.node.mode)){throw new FS.ErrnoError(31)}if(!stream.stream_ops.write){throw new FS.ErrnoError(28)}if(stream.seekable&&stream.flags&1024){FS.llseek(stream,0,2)}var seeking=typeof position!="undefined";if(!seeking){position=stream.position}else if(!stream.seekable){throw new FS.ErrnoError(70)}var bytesWritten=stream.stream_ops.write(stream,buffer,offset,length,position,canOwn);if(!seeking)stream.position+=bytesWritten;return bytesWritten},mmap(stream,length,position,prot,flags){if((prot&2)!==0&&(flags&2)===0&&(stream.flags&2097155)!==2){throw new FS.ErrnoError(2)}if((stream.flags&2097155)===1){throw new FS.ErrnoError(2)}if(!stream.stream_ops.mmap){throw new FS.ErrnoError(43)}if(!length){throw new FS.ErrnoError(28)}return stream.stream_ops.mmap(stream,length,position,prot,flags)},msync(stream,buffer,offset,length,mmapFlags){if(!stream.stream_ops.msync){return 0}return stream.stream_ops.msync(stream,buffer,offset,length,mmapFlags)},ioctl(stream,cmd,arg){if(!stream.stream_ops.ioctl){throw new FS.ErrnoError(59)}return stream.stream_ops.ioctl(stream,cmd,arg)},readFile(path,opts={}){opts.flags=opts.flags||0;opts.encoding=opts.encoding||"binary";if(opts.encoding!=="utf8"&&opts.encoding!=="binary"){abort(`Invalid encoding type "${opts.encoding}"`)}var stream=FS.open(path,opts.flags);var stat=FS.stat(path);var length=stat.size;var buf=new Uint8Array(length);FS.read(stream,buf,0,length,0);if(opts.encoding==="utf8"){buf=UTF8ArrayToString(buf)}FS.close(stream);return buf},writeFile(path,data,opts={}){opts.flags=opts.flags||577;var stream=FS.open(path,opts.flags,opts.mode);if(typeof data=="string"){data=new Uint8Array(intArrayFromString(data,true))}if(ArrayBuffer.isView(data)){FS.write(stream,data,0,data.byteLength,undefined,opts.canOwn)}else{abort("Unsupported data type")}FS.close(stream)},cwd:()=>FS.currentPath,chdir(path){var lookup=FS.lookupPath(path,{follow:true});if(lookup.node===null){throw new FS.ErrnoError(44)}if(!FS.isDir(lookup.node.mode)){throw new FS.ErrnoError(54)}var errCode=FS.nodePermissions(lookup.node,"x");if(errCode){throw new FS.ErrnoError(errCode)}FS.currentPath=lookup.path},createDefaultDirectories(){FS.mkdir("/tmp");FS.mkdir("/home");FS.mkdir("/home/web_user")},createDefaultDevices(){FS.mkdir("/dev");FS.registerDevice(FS.makedev(1,3),{read:()=>0,write:(stream,buffer,offset,length,pos)=>length,llseek:()=>0});FS.mkdev("/dev/null",FS.makedev(1,3));TTY.register(FS.makedev(5,0),TTY.default_tty_ops);TTY.register(FS.makedev(6,0),TTY.default_tty1_ops);FS.mkdev("/dev/tty",FS.makedev(5,0));FS.mkdev("/dev/tty1",FS.makedev(6,0));var randomBuffer=new Uint8Array(1024),randomLeft=0;var randomByte=()=>{if(randomLeft===0){randomFill(randomBuffer);randomLeft=randomBuffer.byteLength}return randomBuffer[--randomLeft]};FS.createDevice("/dev","random",randomByte);FS.createDevice("/dev","urandom",randomByte);FS.mkdir("/dev/shm");FS.mkdir("/dev/shm/tmp")},createSpecialDirectories(){FS.mkdir("/proc");var proc_self=FS.mkdir("/proc/self");FS.mkdir("/proc/self/fd");FS.mount({mount(){var node=FS.createNode(proc_self,"fd",16895,73);node.stream_ops={llseek:MEMFS.stream_ops.llseek};node.node_ops={lookup(parent,name){var fd=+name;var stream=FS.getStreamChecked(fd);var ret={parent:null,mount:{mountpoint:"fake"},node_ops:{readlink:()=>stream.path},id:fd+1};ret.parent=ret;return ret},readdir(){return Array.from(FS.streams.entries()).filter(([k,v])=>v).map(([k,v])=>k.toString())}};return node}},{},"/proc/self/fd")},createStandardStreams(input,output,error){if(input){FS.createDevice("/dev","stdin",input)}else{FS.symlink("/dev/tty","/dev/stdin")}if(output){FS.createDevice("/dev","stdout",null,output)}else{FS.symlink("/dev/tty","/dev/stdout")}if(error){FS.createDevice("/dev","stderr",null,error)}else{FS.symlink("/dev/tty1","/dev/stderr")}var stdin=FS.open("/dev/stdin",0);var stdout=FS.open("/dev/stdout",1);var stderr=FS.open("/dev/stderr",1)},staticInit(){FS.nameTable=new Array(4096);FS.mount(MEMFS,{},"/");FS.createDefaultDirectories();FS.createDefaultDevices();FS.createSpecialDirectories();FS.filesystems={MEMFS}},init(input,output,error){FS.initialized=true;input??=Module["stdin"];output??=Module["stdout"];error??=Module["stderr"];FS.createStandardStreams(input,output,error)},quit(){FS.initialized=false;for(var stream of FS.streams){if(stream){FS.close(stream)}}},findObject(path,dontResolveLastLink){var ret=FS.analyzePath(path,dontResolveLastLink);if(!ret.exists){return null}return ret.object},analyzePath(path,dontResolveLastLink){try{var lookup=FS.lookupPath(path,{follow:!dontResolveLastLink});path=lookup.path}catch(e){}var ret={isRoot:false,exists:false,error:0,name:null,path:null,object:null,parentExists:false,parentPath:null,parentObject:null};try{var lookup=FS.lookupPath(path,{parent:true});ret.parentExists=true;ret.parentPath=lookup.path;ret.parentObject=lookup.node;ret.name=PATH.basename(path);lookup=FS.lookupPath(path,{follow:!dontResolveLastLink});ret.exists=true;ret.path=lookup.path;ret.object=lookup.node;ret.name=lookup.node.name;ret.isRoot=lookup.path==="/"}catch(e){ret.error=e.errno}return ret},createPath(parent,path,canRead,canWrite){parent=typeof parent=="string"?parent:FS.getPath(parent);var parts=path.split("/").reverse();while(parts.length){var part=parts.pop();if(!part)continue;var current=PATH.join2(parent,part);try{FS.mkdir(current)}catch(e){if(e.errno!=20)throw e}parent=current}return current},createFile(parent,name,properties,canRead,canWrite){var path=PATH.join2(typeof parent=="string"?parent:FS.getPath(parent),name);var mode=FS_getMode(canRead,canWrite);return FS.create(path,mode)},createDataFile(parent,name,data,canRead,canWrite,canOwn){var path=name;if(parent){parent=typeof parent=="string"?parent:FS.getPath(parent);path=name?PATH.join2(parent,name):parent}var mode=FS_getMode(canRead,canWrite);var node=FS.create(path,mode);if(data){if(typeof data=="string"){var arr=new Array(data.length);for(var i=0,len=data.length;ithis.length-1||idx<0){return undefined}var chunkOffset=idx%this.chunkSize;var chunkNum=idx/this.chunkSize|0;return this.getter(chunkNum)[chunkOffset]}setDataGetter(getter){this.getter=getter}cacheLength(){var xhr=new XMLHttpRequest;xhr.open("HEAD",url,false);xhr.send(null);if(!(xhr.status>=200&&xhr.status<300||xhr.status===304))abort("Couldn't load "+url+". Status: "+xhr.status);var datalength=Number(xhr.getResponseHeader("Content-length"));var header;var hasByteServing=(header=xhr.getResponseHeader("Accept-Ranges"))&&header==="bytes";var usesGzip=(header=xhr.getResponseHeader("Content-Encoding"))&&header==="gzip";var chunkSize=1024*1024;if(!hasByteServing)chunkSize=datalength;var doXHR=(from,to)=>{if(from>to)abort("invalid range ("+from+", "+to+") or no bytes requested!");if(to>datalength-1)abort("only "+datalength+" bytes available! programmer error!");var xhr=new XMLHttpRequest;xhr.open("GET",url,false);if(datalength!==chunkSize)xhr.setRequestHeader("Range","bytes="+from+"-"+to);xhr.responseType="arraybuffer";if(xhr.overrideMimeType){xhr.overrideMimeType("text/plain; charset=x-user-defined")}xhr.send(null);if(!(xhr.status>=200&&xhr.status<300||xhr.status===304))abort("Couldn't load "+url+". Status: "+xhr.status);if(xhr.response!==undefined){return new Uint8Array(xhr.response||[])}return intArrayFromString(xhr.responseText||"",true)};var lazyArray=this;lazyArray.setDataGetter(chunkNum=>{var start=chunkNum*chunkSize;var end=(chunkNum+1)*chunkSize-1;end=Math.min(end,datalength-1);if(typeof lazyArray.chunks[chunkNum]=="undefined"){lazyArray.chunks[chunkNum]=doXHR(start,end)}if(typeof lazyArray.chunks[chunkNum]=="undefined")abort("doXHR failed!");return lazyArray.chunks[chunkNum]});if(usesGzip||!datalength){chunkSize=datalength=1;datalength=this.getter(0).length;chunkSize=datalength;out("LazyFiles on gzip forces download of the whole file when length is accessed")}this._length=datalength;this._chunkSize=chunkSize;this.lengthKnown=true}get length(){if(!this.lengthKnown){this.cacheLength()}return this._length}get chunkSize(){if(!this.lengthKnown){this.cacheLength()}return this._chunkSize}}if(globalThis.XMLHttpRequest){if(!ENVIRONMENT_IS_WORKER)abort("Cannot do synchronous binary XHRs outside webworkers in modern browsers. Use --embed-file or --preload-file in emcc");var lazyArray=new LazyUint8Array;var properties={isDevice:false,contents:lazyArray}}else{var properties={isDevice:false,url}}var node=FS.createFile(parent,name,properties,canRead,canWrite);if(properties.contents){node.contents=properties.contents}else if(properties.url){node.contents=null;node.url=properties.url}Object.defineProperties(node,{usedBytes:{get:function(){return this.contents.length}}});var stream_ops={};for(const[key,fn]of Object.entries(node.stream_ops)){stream_ops[key]=(...args)=>{FS.forceLoadFile(node);return fn(...args)}}function writeChunks(stream,buffer,offset,length,position){var contents=stream.node.contents;if(position>=contents.length)return 0;var size=Math.min(contents.length-position,length);if(contents.slice){for(var i=0;i{FS.forceLoadFile(node);return writeChunks(stream,buffer,offset,length,position)};stream_ops.mmap=(stream,length,position,prot,flags)=>{FS.forceLoadFile(node);var ptr=mmapAlloc(length);if(!ptr){throw new FS.ErrnoError(48)}writeChunks(stream,HEAP8,ptr,length,position);return{ptr,allocated:true}};node.stream_ops=stream_ops;return node}};var SYSCALLS={DEFAULT_POLLMASK:5,calculateAt(dirfd,path,allowEmpty){if(PATH.isAbs(path)){return path}var dir;if(dirfd===-100){dir=FS.cwd()}else{var dirstream=SYSCALLS.getStreamFromFD(dirfd);dir=dirstream.path}if(path.length==0){if(!allowEmpty){throw new FS.ErrnoError(44)}return dir}return dir+"/"+path},writeStat(buf,stat){HEAPU32[buf>>2]=stat.dev;HEAPU32[buf+4>>2]=stat.mode;HEAPU32[buf+8>>2]=stat.nlink;HEAPU32[buf+12>>2]=stat.uid;HEAPU32[buf+16>>2]=stat.gid;HEAPU32[buf+20>>2]=stat.rdev;HEAP64[buf+24>>3]=BigInt(stat.size);HEAP32[buf+32>>2]=4096;HEAP32[buf+36>>2]=stat.blocks;var atime=stat.atime.getTime();var mtime=stat.mtime.getTime();var ctime=stat.ctime.getTime();HEAP64[buf+40>>3]=BigInt(Math.floor(atime/1e3));HEAPU32[buf+48>>2]=atime%1e3*1e3*1e3;HEAP64[buf+56>>3]=BigInt(Math.floor(mtime/1e3));HEAPU32[buf+64>>2]=mtime%1e3*1e3*1e3;HEAP64[buf+72>>3]=BigInt(Math.floor(ctime/1e3));HEAPU32[buf+80>>2]=ctime%1e3*1e3*1e3;HEAP64[buf+88>>3]=BigInt(stat.ino);return 0},writeStatFs(buf,stats){HEAPU32[buf+4>>2]=stats.bsize;HEAPU32[buf+60>>2]=stats.bsize;HEAP64[buf+8>>3]=BigInt(stats.blocks);HEAP64[buf+16>>3]=BigInt(stats.bfree);HEAP64[buf+24>>3]=BigInt(stats.bavail);HEAP64[buf+32>>3]=BigInt(stats.files);HEAP64[buf+40>>3]=BigInt(stats.ffree);HEAPU32[buf+48>>2]=stats.fsid;HEAPU32[buf+64>>2]=stats.flags;HEAPU32[buf+56>>2]=stats.namelen},doMsync(addr,stream,len,flags,offset){if(!FS.isFile(stream.node.mode)){throw new FS.ErrnoError(43)}if(flags&2){return 0}var buffer=HEAPU8.slice(addr,addr+len);FS.msync(stream,buffer,offset,len,flags)},getStreamFromFD(fd){var stream=FS.getStreamChecked(fd);return stream},varargs:undefined,getStr(ptr){var ret=UTF8ToString(ptr);return ret}};function ___syscall_fcntl64(fd,cmd,varargs){SYSCALLS.varargs=varargs;try{var stream=SYSCALLS.getStreamFromFD(fd);switch(cmd){case 0:{var arg=syscallGetVarargI();if(arg<0){return-28}while(FS.streams[arg]){arg++}var newStream;newStream=FS.dupStream(stream,arg);return newStream.fd}case 1:case 2:return 0;case 3:return stream.flags;case 4:{var arg=syscallGetVarargI();stream.flags|=arg;return 0}case 12:{var arg=syscallGetVarargP();var offset=0;HEAP16[arg+offset>>1]=2;return 0}case 13:case 14:return 0}return-28}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_fstat64(fd,buf){try{return SYSCALLS.writeStat(buf,FS.fstat(fd))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_ioctl(fd,op,varargs){SYSCALLS.varargs=varargs;try{var stream=SYSCALLS.getStreamFromFD(fd);switch(op){case 21509:{if(!stream.tty)return-59;return 0}case 21505:{if(!stream.tty)return-59;if(stream.tty.ops.ioctl_tcgets){var termios=stream.tty.ops.ioctl_tcgets(stream);var argp=syscallGetVarargP();HEAP32[argp>>2]=termios.c_iflag||0;HEAP32[argp+4>>2]=termios.c_oflag||0;HEAP32[argp+8>>2]=termios.c_cflag||0;HEAP32[argp+12>>2]=termios.c_lflag||0;for(var i=0;i<32;i++){HEAP8[argp+i+17]=termios.c_cc[i]||0}return 0}return 0}case 21510:case 21511:case 21512:{if(!stream.tty)return-59;return 0}case 21506:case 21507:case 21508:{if(!stream.tty)return-59;if(stream.tty.ops.ioctl_tcsets){var argp=syscallGetVarargP();var c_iflag=HEAP32[argp>>2];var c_oflag=HEAP32[argp+4>>2];var c_cflag=HEAP32[argp+8>>2];var c_lflag=HEAP32[argp+12>>2];var c_cc=[];for(var i=0;i<32;i++){c_cc.push(HEAP8[argp+i+17])}return stream.tty.ops.ioctl_tcsets(stream.tty,op,{c_iflag,c_oflag,c_cflag,c_lflag,c_cc})}return 0}case 21519:{if(!stream.tty)return-59;var argp=syscallGetVarargP();HEAP32[argp>>2]=0;return 0}case 21520:{if(!stream.tty)return-59;return-28}case 21537:case 21531:{var argp=syscallGetVarargP();return FS.ioctl(stream,op,argp)}case 21523:{if(!stream.tty)return-59;if(stream.tty.ops.ioctl_tiocgwinsz){var winsize=stream.tty.ops.ioctl_tiocgwinsz(stream.tty);var argp=syscallGetVarargP();HEAP16[argp>>1]=winsize[0];HEAP16[argp+2>>1]=winsize[1]}return 0}case 21524:{if(!stream.tty)return-59;return 0}case 21515:{if(!stream.tty)return-59;return 0}default:return-28}}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_lstat64(path,buf){try{path=SYSCALLS.getStr(path);return SYSCALLS.writeStat(buf,FS.lstat(path))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_newfstatat(dirfd,path,buf,flags){try{path=SYSCALLS.getStr(path);var nofollow=flags&256;var allowEmpty=flags&4096;flags=flags&~6400;path=SYSCALLS.calculateAt(dirfd,path,allowEmpty);return SYSCALLS.writeStat(buf,nofollow?FS.lstat(path):FS.stat(path))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_openat(dirfd,path,flags,varargs){SYSCALLS.varargs=varargs;try{path=SYSCALLS.getStr(path);path=SYSCALLS.calculateAt(dirfd,path);var mode=varargs?syscallGetVarargI():0;return FS.open(path,flags,mode).fd}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}function ___syscall_stat64(path,buf){try{path=SYSCALLS.getStr(path);return SYSCALLS.writeStat(buf,FS.stat(path))}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}var __abort_js=()=>abort("");var INT53_MAX=9007199254740992;var INT53_MIN=-9007199254740992;var bigintToI53Checked=num=>numINT53_MAX?NaN:Number(num);function __munmap_js(addr,len,prot,flags,fd,offset){offset=bigintToI53Checked(offset);try{var stream=SYSCALLS.getStreamFromFD(fd);if(prot&2){SYSCALLS.doMsync(addr,stream,len,flags,offset)}}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return-e.errno}}var stringToUTF8=(str,outPtr,maxBytesToWrite)=>stringToUTF8Array(str,HEAPU8,outPtr,maxBytesToWrite);var __tzset_js=(timezone,daylight,std_name,dst_name)=>{var currentYear=(new Date).getFullYear();var winter=new Date(currentYear,0,1);var summer=new Date(currentYear,6,1);var winterOffset=winter.getTimezoneOffset();var summerOffset=summer.getTimezoneOffset();var stdTimezoneOffset=Math.max(winterOffset,summerOffset);HEAPU32[timezone>>2]=stdTimezoneOffset*60;HEAP32[daylight>>2]=Number(winterOffset!=summerOffset);var extractZone=timezoneOffset=>{var sign=timezoneOffset>=0?"-":"+";var absOffset=Math.abs(timezoneOffset);var hours=String(Math.floor(absOffset/60)).padStart(2,"0");var minutes=String(absOffset%60).padStart(2,"0");return`UTC${sign}${hours}${minutes}`};var winterName=extractZone(winterOffset);var summerName=extractZone(summerOffset);if(summerOffset2147483648;var growMemory=size=>{var oldHeapSize=wasmMemory.buffer.byteLength;var pages=(size-oldHeapSize+65535)/65536|0;try{wasmMemory.grow(pages);updateMemoryViews();return 1}catch(e){}};var _emscripten_resize_heap=requestedSize=>{var oldSize=HEAPU8.length;requestedSize>>>=0;var maxHeapSize=getHeapMax();if(requestedSize>maxHeapSize){return false}for(var cutDown=1;cutDown<=4;cutDown*=2){var overGrownHeapSize=oldSize*(1+.2/cutDown);overGrownHeapSize=Math.min(overGrownHeapSize,requestedSize+100663296);var newSize=Math.min(maxHeapSize,alignMemory(Math.max(requestedSize,overGrownHeapSize),65536));var replacement=growMemory(newSize);if(replacement){return true}}return false};var _emscripten_throw_string=str=>{throw UTF8ToString(str)};var ENV={};var getExecutableName=()=>thisProgram||"./this.program";var getEnvStrings=()=>{if(!getEnvStrings.strings){var lang=(globalThis.navigator?.language??"C").replace("-","_")+".UTF-8";var env={USER:"web_user",LOGNAME:"web_user",PATH:"/",PWD:"/",HOME:"/home/web_user",LANG:lang,_:getExecutableName()};for(var x in ENV){if(ENV[x]===undefined)delete env[x];else env[x]=ENV[x]}var strings=[];for(var x in env){strings.push(`${x}=${env[x]}`)}getEnvStrings.strings=strings}return getEnvStrings.strings};var _environ_get=(__environ,environ_buf)=>{var bufSize=0;var envp=0;for(var string of getEnvStrings()){var ptr=environ_buf+bufSize;HEAPU32[__environ+envp>>2]=ptr;bufSize+=stringToUTF8(string,ptr,Infinity)+1;envp+=4}return 0};var _environ_sizes_get=(penviron_count,penviron_buf_size)=>{var strings=getEnvStrings();HEAPU32[penviron_count>>2]=strings.length;var bufSize=0;for(var string of strings){bufSize+=lengthBytesUTF8(string)+1}HEAPU32[penviron_buf_size>>2]=bufSize;return 0};function _fd_close(fd){try{var stream=SYSCALLS.getStreamFromFD(fd);FS.close(stream);return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return e.errno}}var doReadv=(stream,iov,iovcnt,offset)=>{var ret=0;for(var i=0;i>2];var len=HEAPU32[iov+4>>2];iov+=8;var curr=FS.read(stream,HEAP8,ptr,len,offset);if(curr<0)return-1;ret+=curr;if(curr>2]=num;return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return e.errno}}function _fd_seek(fd,offset,whence,newOffset){offset=bigintToI53Checked(offset);try{if(isNaN(offset))return 61;var stream=SYSCALLS.getStreamFromFD(fd);FS.llseek(stream,offset,whence);HEAP64[newOffset>>3]=BigInt(stream.position);if(stream.getdents&&offset===0&&whence===0)stream.getdents=null;return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return e.errno}}var doWritev=(stream,iov,iovcnt,offset)=>{var ret=0;for(var i=0;i>2];var len=HEAPU32[iov+4>>2];iov+=8;var curr=FS.write(stream,HEAP8,ptr,len,offset);if(curr<0)return-1;ret+=curr;if(curr>2]=num;return 0}catch(e){if(typeof FS=="undefined"||!(e.name==="ErrnoError"))throw e;return e.errno}}var _llvm_eh_typeid_for=type=>type;var wasmTableMirror=[];var getWasmTableEntry=funcPtr=>{var func=wasmTableMirror[funcPtr];if(!func){wasmTableMirror[funcPtr]=func=wasmTable.get(funcPtr)}return func};var getCFunc=ident=>{var func=Module["_"+ident];return func};var writeArrayToMemory=(array,buffer)=>{HEAP8.set(array,buffer)};var stackAlloc=sz=>__emscripten_stack_alloc(sz);var stringToUTF8OnStack=str=>{var size=lengthBytesUTF8(str)+1;var ret=stackAlloc(size);stringToUTF8(str,ret,size);return ret};var ccall=(ident,returnType,argTypes,args,opts)=>{var toC={string:str=>{var ret=0;if(str!==null&&str!==undefined&&str!==0){ret=stringToUTF8OnStack(str)}return ret},array:arr=>{var ret=stackAlloc(arr.length);writeArrayToMemory(arr,ret);return ret}};function convertReturnValue(ret){if(returnType==="string"){return UTF8ToString(ret)}if(returnType==="boolean")return Boolean(ret);return ret}var func=getCFunc(ident);var cArgs=[];var stack=0;if(args){for(var i=0;i{var numericArgs=!argTypes||argTypes.every(type=>type==="number"||type==="boolean");var numericRet=returnType!=="string";if(numericRet&&numericArgs&&!opts){return getCFunc(ident)}return(...args)=>ccall(ident,returnType,argTypes,args,opts)};var FS_createPath=(...args)=>FS.createPath(...args);var FS_unlink=(...args)=>FS.unlink(...args);var FS_createLazyFile=(...args)=>FS.createLazyFile(...args);var FS_createDevice=(...args)=>FS.createDevice(...args);FS.createPreloadedFile=FS_createPreloadedFile;FS.preloadFile=FS_preloadFile;FS.staticInit();{if(Module["noExitRuntime"])noExitRuntime=Module["noExitRuntime"];if(Module["preloadPlugins"])preloadPlugins=Module["preloadPlugins"];if(Module["print"])out=Module["print"];if(Module["printErr"])err=Module["printErr"];if(Module["wasmBinary"])wasmBinary=Module["wasmBinary"];if(Module["arguments"])arguments_=Module["arguments"];if(Module["thisProgram"])thisProgram=Module["thisProgram"];if(Module["preInit"]){if(typeof Module["preInit"]=="function")Module["preInit"]=[Module["preInit"]];while(Module["preInit"].length>0){Module["preInit"].shift()()}}}Module["addRunDependency"]=addRunDependency;Module["removeRunDependency"]=removeRunDependency;Module["ccall"]=ccall;Module["cwrap"]=cwrap;Module["FS_preloadFile"]=FS_preloadFile;Module["FS_unlink"]=FS_unlink;Module["FS_createPath"]=FS_createPath;Module["FS_createDevice"]=FS_createDevice;Module["FS"]=FS;Module["FS_createDataFile"]=FS_createDataFile;Module["FS_createLazyFile"]=FS_createLazyFile;var ___cxa_free_exception,_opencc_create,_opencc_convert,_opencc_destroy,_emscripten_builtin_memalign,_setThrew,__emscripten_tempret_set,__emscripten_stack_restore,__emscripten_stack_alloc,_emscripten_stack_get_current,___cxa_decrement_exception_refcount,___cxa_increment_exception_refcount,___cxa_can_catch,___cxa_get_exception_ptr,memory,__indirect_function_table,wasmMemory,wasmTable;function assignWasmExports(wasmExports){___cxa_free_exception=wasmExports["__cxa_free_exception"];_opencc_create=Module["_opencc_create"]=wasmExports["opencc_create"];_opencc_convert=Module["_opencc_convert"]=wasmExports["opencc_convert"];_opencc_destroy=Module["_opencc_destroy"]=wasmExports["opencc_destroy"];_emscripten_builtin_memalign=wasmExports["emscripten_builtin_memalign"];_setThrew=wasmExports["setThrew"];__emscripten_tempret_set=wasmExports["_emscripten_tempret_set"];__emscripten_stack_restore=wasmExports["_emscripten_stack_restore"];__emscripten_stack_alloc=wasmExports["_emscripten_stack_alloc"];_emscripten_stack_get_current=wasmExports["emscripten_stack_get_current"];___cxa_decrement_exception_refcount=wasmExports["__cxa_decrement_exception_refcount"];___cxa_increment_exception_refcount=wasmExports["__cxa_increment_exception_refcount"];___cxa_can_catch=wasmExports["__cxa_can_catch"];___cxa_get_exception_ptr=wasmExports["__cxa_get_exception_ptr"];memory=wasmMemory=wasmExports["memory"];__indirect_function_table=wasmTable=wasmExports["__indirect_function_table"]}var wasmImports={__assert_fail:___assert_fail,__cxa_begin_catch:___cxa_begin_catch,__cxa_end_catch:___cxa_end_catch,__cxa_find_matching_catch_2:___cxa_find_matching_catch_2,__cxa_find_matching_catch_3:___cxa_find_matching_catch_3,__cxa_rethrow:___cxa_rethrow,__cxa_throw:___cxa_throw,__cxa_uncaught_exceptions:___cxa_uncaught_exceptions,__resumeException:___resumeException,__syscall_fcntl64:___syscall_fcntl64,__syscall_fstat64:___syscall_fstat64,__syscall_ioctl:___syscall_ioctl,__syscall_lstat64:___syscall_lstat64,__syscall_newfstatat:___syscall_newfstatat,__syscall_openat:___syscall_openat,__syscall_stat64:___syscall_stat64,_abort_js:__abort_js,_munmap_js:__munmap_js,_tzset_js:__tzset_js,emscripten_resize_heap:_emscripten_resize_heap,emscripten_throw_string:_emscripten_throw_string,environ_get:_environ_get,environ_sizes_get:_environ_sizes_get,fd_close:_fd_close,fd_read:_fd_read,fd_seek:_fd_seek,fd_write:_fd_write,invoke_diii,invoke_fiii,invoke_i,invoke_ii,invoke_iii,invoke_iiii,invoke_iiiii,invoke_iiiiii,invoke_iiiiiii,invoke_iiiiiiii,invoke_iiiiiiiiiii,invoke_iiiiiiiiiiii,invoke_iiiiiiiiiiiii,invoke_jiiii,invoke_v,invoke_vi,invoke_vii,invoke_viii,invoke_viiii,invoke_viiiii,invoke_viiiiii,invoke_viiiiiii,invoke_viiiiiiiiii,invoke_viiiiiiiiiiiiiii,llvm_eh_typeid_for:_llvm_eh_typeid_for};function invoke_viiiiiii(index,a1,a2,a3,a4,a5,a6,a7){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6,a7)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_ii(index,a1){var sp=stackSave();try{return getWasmTableEntry(index)(a1)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_viiii(index,a1,a2,a3,a4){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2,a3,a4)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_vii(index,a1,a2){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_iii(index,a1,a2){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_viii(index,a1,a2,a3){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2,a3)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_v(index){var sp=stackSave();try{getWasmTableEntry(index)()}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_iiii(index,a1,a2,a3){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_viiiii(index,a1,a2,a3,a4,a5){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2,a3,a4,a5)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_iiiii(index,a1,a2,a3,a4){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3,a4)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_vi(index,a1){var sp=stackSave();try{getWasmTableEntry(index)(a1)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_iiiiiii(index,a1,a2,a3,a4,a5,a6){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_viiiiii(index,a1,a2,a3,a4,a5,a6){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_iiiiii(index,a1,a2,a3,a4,a5){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3,a4,a5)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_iiiiiiii(index,a1,a2,a3,a4,a5,a6,a7){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6,a7)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_iiiiiiiiiii(index,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6,a7,a8,a9,a10)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_jiiii(index,a1,a2,a3,a4){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3,a4)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0);return 0n}}function invoke_iiiiiiiiiiiii(index,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_fiii(index,a1,a2,a3){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_diii(index,a1,a2,a3){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_i(index){var sp=stackSave();try{return getWasmTableEntry(index)()}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_iiiiiiiiiiii(index,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11){var sp=stackSave();try{return getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_viiiiiiiiii(index,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6,a7,a8,a9,a10)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function invoke_viiiiiiiiiiiiiii(index,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15){var sp=stackSave();try{getWasmTableEntry(index)(a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15)}catch(e){stackRestore(sp);if(e!==e+0)throw e;_setThrew(1,0)}}function run(){if(runDependencies>0){dependenciesFulfilled=run;return}preRun();if(runDependencies>0){dependenciesFulfilled=run;return}function doRun(){Module["calledRun"]=true;if(ABORT)return;initRuntime();readyPromiseResolve?.(Module);Module["onRuntimeInitialized"]?.();postRun()}if(Module["setStatus"]){Module["setStatus"]("Running...");setTimeout(()=>{setTimeout(()=>Module["setStatus"](""),1);doRun()},1)}else{doRun()}}var wasmExports;wasmExports=await (createWasm());run();if(runtimeInitialized){moduleRtn=Module}else{moduleRtn=new Promise((resolve,reject)=>{readyPromiseResolve=resolve;readyPromiseReject=reject})} +;return moduleRtn}export default createOpenCCWasm; diff --git a/wasm-lib/dist/opencc-wasm.esm.wasm b/wasm-lib/dist/opencc-wasm.esm.wasm new file mode 100755 index 000000000..359d21bac Binary files /dev/null and b/wasm-lib/dist/opencc-wasm.esm.wasm differ diff --git a/wasm-lib/dist/opencc-wasm.wasm b/wasm-lib/dist/opencc-wasm.wasm new file mode 100755 index 000000000..359d21bac Binary files /dev/null and b/wasm-lib/dist/opencc-wasm.wasm differ diff --git a/wasm-lib/index.d.ts b/wasm-lib/index.d.ts new file mode 100644 index 000000000..ed68d18aa --- /dev/null +++ b/wasm-lib/index.d.ts @@ -0,0 +1,130 @@ +/** + * OpenCC WASM - WebAssembly backend for OpenCC + * + * @packageDocumentation + */ + +/** + * Options for creating a converter + */ +export interface ConverterOptions { + /** + * Source locale: 'cn' (Simplified Chinese), 'tw' (Traditional Taiwan), + * 'hk' (Traditional Hong Kong), 't' (Traditional), 'jp' (Japanese) + */ + from?: string; + + /** + * Target locale: 'cn' (Simplified Chinese), 'tw' (Traditional Taiwan), + * 'hk' (Traditional Hong Kong), 't' (Traditional), 'jp' (Japanese) + */ + to?: string; + + /** + * Config file name (e.g., 's2t.json', 't2s.json') + * If specified, 'from' and 'to' will be ignored + */ + config?: string; +} + +/** + * Async converter function that transforms text + */ +export type ConverterFunction = (text: string) => Promise; + +/** + * Synchronous custom converter function (for custom dictionaries) + */ +export type CustomConverterFunction = (text: string) => string; + +/** + * Custom dictionary entry: [source, target] + */ +export type DictEntry = [string, string]; + +/** + * Custom dictionary: array of entries or pipe-separated string + */ +export type CustomDict = DictEntry[] | string; + +/** + * Locale mappings + */ +export interface LocaleMap { + cn: string; + tw: string; + hk: string; + jp: string; + t: string; +} + +/** + * OpenCC namespace with all conversion functions + */ +export interface OpenCCNamespace { + /** + * Create a converter with the given options + * + * @example + * ```typescript + * const converter = OpenCC.Converter({ from: 'cn', to: 'tw' }); + * const result = await converter('简体中文'); + * ``` + */ + Converter(opts: ConverterOptions): ConverterFunction; + + /** + * Create a custom converter with user-defined dictionary + * + * @param dict - Array of [source, target] pairs or pipe-separated string + * + * @example + * ```typescript + * const custom = OpenCC.CustomConverter([ + * ['"', '「'], + * ['"', '」'], + * ]); + * const result = custom('He said "hello"'); + * ``` + */ + CustomConverter(dict: CustomDict): CustomConverterFunction; + + /** + * Create a converter with additional custom dictionaries + * + * @param fromLocale - Source locale + * @param toLocale - Target locale + * @param extraDicts - Additional custom dictionaries to apply after conversion + * + * @example + * ```typescript + * const converter = OpenCC.ConverterFactory('cn', 'tw', [ + * [['"', '「'], ['"', '」']] + * ]); + * const result = await converter('简体中文 "test"'); + * ``` + */ + ConverterFactory( + fromLocale: string, + toLocale: string, + extraDicts?: CustomDict[] + ): ConverterFunction; + + /** + * Locale constants for 'from' and 'to' options + */ + Locale: { + from: LocaleMap; + to: LocaleMap; + }; +} + +/** + * OpenCC main export + */ +export const OpenCC: OpenCCNamespace; + +/** + * Default export + */ +export default OpenCC; diff --git a/wasm-lib/index.js b/wasm-lib/index.js new file mode 100644 index 000000000..44f996725 --- /dev/null +++ b/wasm-lib/index.js @@ -0,0 +1,219 @@ +// Lightweight OpenCC WASM wrapper with opencc-js compatible API. +// 假定包内目录结构: +// - dist/opencc-wasm.js|.wasm +// - data/config/*.json +// - data/dict/*.ocd2 + +// Optional Node helpers (guarded for browser) +let fsMod = null; +let fileURLToPathFn = null; +const hasNode = typeof process !== "undefined" && !!process.versions?.node; +if (hasNode) { + fsMod = await import("node:fs"); + ({ fileURLToPath: fileURLToPathFn } = await import("node:url")); +} + +const BASE_URL = new URL("./", import.meta.url); + +const readFileText = (url) => { + if (!fsMod || !fileURLToPathFn) throw new Error("fs not available in this environment"); + const path = fileURLToPathFn(url); + return fsMod.readFileSync(path, "utf-8"); +}; + +const readFileBuffer = (url) => { + if (!fsMod || !fileURLToPathFn) throw new Error("fs not available in this environment"); + const path = fileURLToPathFn(url); + return fsMod.readFileSync(path); +}; + +// 预设映射:from -> to -> config 文件名 +const CONFIG_MAP = { + cn: { t: "s2t.json", tw: "s2tw.json", hk: "s2hk.json", cn: null }, + tw: { cn: "tw2s.json", t: "tw2t.json", tw: null }, + hk: { cn: "hk2s.json", t: "hk2t.json", hk: null }, + t: { cn: "t2s.json", tw: "t2tw.json", hk: "t2hk.json", jp: "t2jp.json", t: null }, + jp: { t: "jp2t.json" }, +}; + +// 缓存已加载的配置/字典与打开的句柄,避免重复加载和重复构建 +const loadedConfigs = new Set(); +const loadedDicts = new Set(); +const handles = new Map(); +let modulePromise = null; +let api = null; + +async function getModule() { + if (modulePromise) return modulePromise; + + // 1) 先确定包根目录(一定要以 / 结尾) + const pkgBase = new URL("./", import.meta.url); + // 如果这段代码在 HTML inline script 里,没有 import.meta.url,那就用绝对路径: + // const pkgBase = new URL("/vendor/opencc-wasm/", window.location.origin); + + // 2) import glue (from build/ for testing/development) + const glueUrl = new URL("build/opencc-wasm.esm.js", pkgBase); + + const { default: create } = await import(glueUrl.href); + + // 3) locateFile 必须相对 pkgBase,而不是 glueUrl + modulePromise = create({ + locateFile: (p) => new URL(`build/${p}`, pkgBase).href + }); + + return modulePromise; +} + +async function getApi() { + const mod = await getModule(); + if (!api) { + api = { + create: mod.cwrap("opencc_create", "number", ["string"]), + convert: mod.cwrap("opencc_convert", "string", ["number", "string"]), + destroy: mod.cwrap("opencc_destroy", null, ["number"]), + }; + } + return { mod, api }; +} + +function collectOcd2Files(node, acc) { + if (!node || typeof node !== "object") return; + if (node.type === "ocd2" && node.file) acc.add(node.file); + if (node.type === "group" && Array.isArray(node.dicts)) { + node.dicts.forEach((d) => collectOcd2Files(d, acc)); + } +} + +async function fetchText(url) { + if (url.protocol === "file:") { + return readFileText(url); + } + const resp = await fetch(url.href); + if (!resp.ok) throw new Error(`Fetch ${url} failed: ${resp.status}`); + return resp.text(); +} + +async function fetchBuffer(url) { + if (url.protocol === "file:") { + return new Uint8Array(readFileBuffer(url)); + } + const resp = await fetch(url.href); + if (!resp.ok) throw new Error(`Fetch ${url} failed: ${resp.status}`); + return new Uint8Array(await resp.arrayBuffer()); +} + +async function ensureConfig(configName) { + if (handles.has(configName)) return handles.get(configName); + const { mod, api: apiFns } = await getApi(); + mod.FS.mkdirTree("/data/config"); + mod.FS.mkdirTree("/data/dict"); + const cfgUrl = new URL(`./data/config/${configName}`, BASE_URL); + const cfgJson = JSON.parse(await fetchText(cfgUrl)); + + const dicts = new Set(); + collectOcd2Files(cfgJson.segmentation?.dict, dicts); + if (Array.isArray(cfgJson.conversion_chain)) { + cfgJson.conversion_chain.forEach((item) => collectOcd2Files(item?.dict, dicts)); + } + for (const file of dicts) { + if (loadedDicts.has(file)) continue; // 避免重复加载同一字典 + const dictUrl = new URL(`./data/dict/${file}`, BASE_URL); + const buf = await fetchBuffer(dictUrl); + mod.FS.writeFile(`/data/dict/${file}`, buf); + loadedDicts.add(file); + } + // 重写配置中的 ocd2 路径到 /data/dict 下 + const patchPaths = (node) => { + if (!node || typeof node !== "object") return; + if (node.type === "ocd2" && node.file) { + node.file = `/data/dict/${node.file}`; + } + if (node.type === "group" && Array.isArray(node.dicts)) { + node.dicts.forEach(patchPaths); + } + }; + patchPaths(cfgJson.segmentation?.dict); + if (Array.isArray(cfgJson.conversion_chain)) { + cfgJson.conversion_chain.forEach((item) => patchPaths(item?.dict)); + } + mod.FS.writeFile(`/data/config/${configName}`, JSON.stringify(cfgJson)); + loadedConfigs.add(configName); + + const handle = apiFns.create(`/data/config/${configName}`); + if (!handle || handle < 0) { + throw new Error(`opencc_create failed for ${configName}`); + } + handles.set(configName, handle); + return handle; +} + +function resolveConfig(from, to) { + const f = (from || "").toLowerCase(); + const t = (to || "").toLowerCase(); + const m = CONFIG_MAP[f]; + if (!m || !(t in m)) { + throw new Error(`Unsupported conversion from '${from}' to '${to}'`); + } + return m[t]; // may be null for identical locale (no-op) +} + +function createConverter({ from, to, config }) { + const configName = config || resolveConfig(from, to); + return async (text) => { + if (configName === null) return text; // no-op + const handle = await ensureConfig(configName); + const { api: apiFns } = await getApi(); + return apiFns.convert(handle, text); + }; +} + +function CustomConverter(dictOrString) { + let pairs = []; + if (typeof dictOrString === "string") { + pairs = dictOrString + .split("|") + .map((seg) => seg.trim()) + .filter(Boolean) + .map((seg) => seg.split(/\s+/)) + .filter((arr) => arr.length >= 2) + .map(([a, b]) => [a, b]); + } else if (Array.isArray(dictOrString)) { + pairs = dictOrString; + } + // 按键长度降序,保证长词优先 + pairs.sort((a, b) => b[0].length - a[0].length); + return (text) => { + let out = text; + for (const [src, dst] of pairs) { + out = out.split(src).join(dst); + } + return out; + }; +} + +function ConverterFactory(fromLocale, toLocale, extraDicts = []) { + const conv = createConverter({ from: fromLocale, to: toLocale }); + const extras = extraDicts.map((d) => CustomConverter(d)); + return async (text) => { + let result = await conv(text); + extras.forEach((fn) => { + result = fn(result); + }); + return result; + }; +} + +export const OpenCC = { + Converter(opts) { + const fn = createConverter(opts); + return (text) => fn(text); + }, + CustomConverter, + ConverterFactory, + Locale: { + from: { cn: "cn", tw: "t", hk: "hk", jp: "jp", t: "t" }, + to: { cn: "cn", tw: "tw", hk: "hk", jp: "jp", t: "t" }, + }, +}; + +export default OpenCC; diff --git a/wasm-lib/package-lock.json b/wasm-lib/package-lock.json new file mode 100644 index 000000000..57c3bfaf3 --- /dev/null +++ b/wasm-lib/package-lock.json @@ -0,0 +1,13 @@ +{ + "name": "opencc-wasm", + "version": "0.2.1", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "opencc-wasm", + "version": "0.2.1", + "license": "Apache-2.0" + } + } +} diff --git a/wasm-lib/package.json b/wasm-lib/package.json new file mode 100644 index 000000000..7dda5c390 --- /dev/null +++ b/wasm-lib/package.json @@ -0,0 +1,54 @@ +{ + "name": "opencc-wasm", + "version": "0.2.1", + "description": "WebAssembly backend for OpenCC with opencc-js compatible API and official configs/ocd2 dictionaries.", + "keywords": [ + "opencc", + "wasm", + "chinese", + "simplified", + "traditional", + "converter" + ], + "type": "module", + "main": "./dist/cjs/index.cjs", + "module": "./dist/esm/index.js", + "types": "./index.d.ts", + "exports": { + ".": { + "types": "./index.d.ts", + "import": "./dist/esm/index.js", + "require": "./dist/cjs/index.cjs" + }, + "./wasm": { + "import": "./dist/esm/opencc-wasm.js", + "require": "./dist/cjs/opencc-wasm.cjs" + }, + "./dist/*": "./dist/*" + }, + "files": [ + "dist/", + "index.d.ts", + "README.md", + "LICENSE", + "NOTICE" + ], + "scripts": { + "build": "./build.sh && node scripts/build-api.js", + "test": "node --test test/opencc.test.js" + }, + "directories": { + "test": "test" + }, + "author": "Frank Lin (based on OpenCC)", + "contributors": [ + "OpenCC Authors" + ], + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "https://github.com/frankslin/OpenCC.git", + "directory": "wasm-lib" + }, + "homepage": "https://github.com/frankslin/OpenCC/tree/master/wasm-lib" +} diff --git a/wasm-lib/scripts/build-api.js b/wasm-lib/scripts/build-api.js new file mode 100644 index 000000000..28c9c9bb9 --- /dev/null +++ b/wasm-lib/scripts/build-api.js @@ -0,0 +1,231 @@ +#!/usr/bin/env node +/** + * Build publishable dist/ from intermediate build/ artifacts. + * Assumes `build.sh` has produced build/opencc-wasm.{esm.js,cjs,wasm} + * and data/ contains config/ + dict/ to be copied into dist/data/. + */ + +import fs from "node:fs"; +import path from "node:path"; +import url from "node:url"; + +const __dirname = path.dirname(url.fileURLToPath(import.meta.url)); +const root = path.resolve(__dirname, ".."); +const build = path.join(root, "build"); +const dist = path.join(root, "dist"); +const distEsm = path.join(dist, "esm"); +const distCjs = path.join(dist, "cjs"); + +fs.mkdirSync(distEsm, { recursive: true }); +fs.mkdirSync(distCjs, { recursive: true }); + +// Copy WASM glue from build/ to dist/ +let esmGlue = fs.readFileSync(path.join(build, "opencc-wasm.esm.js"), "utf-8"); +// Keep the .esm.wasm reference; package both wasm filenames for compatibility. +esmGlue = esmGlue.replace(/opencc-wasm\\.esm\\.wasm/g, "opencc-wasm.esm.wasm"); +fs.writeFileSync(path.join(distEsm, "opencc-wasm.js"), esmGlue, "utf-8"); +fs.copyFileSync(path.join(build, "opencc-wasm.cjs"), path.join(distCjs, "opencc-wasm.cjs")); +fs.copyFileSync(path.join(build, "opencc-wasm.wasm"), path.join(dist, "opencc-wasm.wasm")); +fs.copyFileSync(path.join(build, "opencc-wasm.esm.wasm"), path.join(dist, "opencc-wasm.esm.wasm")); + +// Copy data folder into dist/data for bundled lookup +const dataSrc = path.join(root, "data"); +const dataDst = path.join(dist, "data"); +if (fs.existsSync(dataDst)) fs.rmSync(dataDst, { recursive: true, force: true }); +fs.cpSync(dataSrc, dataDst, { recursive: true }); + +// Source API (ESM) with locateFile override for wasm/worker +const srcApiPath = path.join(root, "index.js"); +let apiSource = fs.readFileSync(srcApiPath, "utf-8"); +apiSource = apiSource.replace( + 'const BASE_URL = new URL("./", import.meta.url);', + 'const BASE_URL = new URL("../", import.meta.url);' +); +apiSource = apiSource.replace( + 'const glueUrl = new URL("build/opencc-wasm.esm.js", pkgBase);', + 'const glueUrl = new URL("./opencc-wasm.js", import.meta.url);' +); +apiSource = apiSource.replace( + 'locateFile: (p) => new URL(`build/${p}`, pkgBase).href', + 'locateFile: (p) => new URL(`../${p}`, import.meta.url).href' +); +fs.writeFileSync(path.join(distEsm, "index.js"), apiSource, "utf-8"); + +// CJS wrapper: hand-written small shim that mirrors the ESM API. +const cjsShim = ` +const fs = require("node:fs"); +const { fileURLToPath } = require("node:url"); +const { default: fetchFn = fetch } = {}; + +const BASE_URL = new (require("node:url").URL)("../", import.meta.url || "file://" + __filename); + +const readFileText = (url) => fs.readFileSync(fileURLToPath(url), "utf-8"); +const readFileBuffer = (url) => fs.readFileSync(fileURLToPath(url)); + +const CONFIG_MAP = { + cn: { t: "s2t.json", tw: "s2tw.json", hk: "s2hk.json", cn: null }, + tw: { cn: "tw2s.json", t: "tw2t.json", tw: null }, + hk: { cn: "hk2s.json", t: "hk2t.json", hk: null }, + t: { cn: "t2s.json", tw: "t2tw.json", hk: "t2hk.json", jp: "t2jp.json", t: null }, + jp: { t: "jp2t.json" }, +}; + +const loadedConfigs = new Set(); +const loadedDicts = new Set(); +const handles = new Map(); +let modulePromise = null; +let api = null; + +async function getModule() { + if (!modulePromise) { + const wasmUrl = new URL("./opencc-wasm.cjs", import.meta.url || "file://" + __filename); + const create = require(wasmUrl); + modulePromise = create(); + } + return modulePromise; +} + +async function getApi() { + const mod = await getModule(); + if (!api) { + api = { + create: mod.cwrap("opencc_create", "number", ["string"]), + convert: mod.cwrap("opencc_convert", "string", ["number", "string"]), + destroy: mod.cwrap("opencc_destroy", null, ["number"]), + }; + } + return { mod, api }; +} + +function collectOcd2Files(node, acc) { + if (!node || typeof node !== "object") return; + if (node.type === "ocd2" && node.file) acc.add(node.file); + if (node.type === "group" && Array.isArray(node.dicts)) { + node.dicts.forEach((d) => collectOcd2Files(d, acc)); + } +} + +async function fetchText(urlObj) { + if (urlObj.protocol === "file:") return readFileText(urlObj); + const resp = await fetch(urlObj.href); + if (!resp.ok) throw new Error("Fetch " + urlObj + " failed: " + resp.status); + return resp.text(); +} +async function fetchBuffer(urlObj) { + if (urlObj.protocol === "file:") return new Uint8Array(readFileBuffer(urlObj)); + const resp = await fetch(urlObj.href); + if (!resp.ok) throw new Error("Fetch " + urlObj + " failed: " + resp.status); + return new Uint8Array(await resp.arrayBuffer()); +} + +async function ensureConfig(configName) { + if (handles.has(configName)) return handles.get(configName); + const { mod, api: apiFns } = await getApi(); + mod.FS.mkdirTree("/data/config"); + mod.FS.mkdirTree("/data/dict"); + const cfgUrl = new URL("../data/config/" + configName, BASE_URL); + const cfgJson = JSON.parse(await fetchText(cfgUrl)); + + const dicts = new Set(); + collectOcd2Files(cfgJson.segmentation?.dict, dicts); + if (Array.isArray(cfgJson.conversion_chain)) { + cfgJson.conversion_chain.forEach((item) => collectOcd2Files(item?.dict, dicts)); + } + for (const file of dicts) { + if (loadedDicts.has(file)) continue; + const dictUrl = new URL("../data/dict/" + file, BASE_URL); + const buf = await fetchBuffer(dictUrl); + mod.FS.writeFile("/data/dict/" + file, buf); + loadedDicts.add(file); + } + const patchPaths = (node) => { + if (!node || typeof node !== "object") return; + if (node.type === "ocd2" && node.file) node.file = "/data/dict/" + node.file; + if (node.type === "group" && Array.isArray(node.dicts)) node.dicts.forEach(patchPaths); + }; + patchPaths(cfgJson.segmentation?.dict); + if (Array.isArray(cfgJson.conversion_chain)) { + cfgJson.conversion_chain.forEach((item) => patchPaths(item?.dict)); + } + mod.FS.writeFile("/data/config/" + configName, JSON.stringify(cfgJson)); + loadedConfigs.add(configName); + + const handle = apiFns.create("/data/config/" + configName); + if (!handle || handle < 0) throw new Error("opencc_create failed for " + configName); + handles.set(configName, handle); + return handle; +} + +function resolveConfig(from, to) { + const f = (from || "").toLowerCase(); + const t = (to || "").toLowerCase(); + const m = CONFIG_MAP[f]; + if (!m || !(t in m)) throw new Error("Unsupported conversion from '" + from + "' to '" + to + "'"); + return m[t]; +} + +function createConverter({ from, to, config }) { + const configName = config || resolveConfig(from, to); + return async (text) => { + if (configName === null) return text; + const handle = await ensureConfig(configName); + const { api: apiFns } = await getApi(); + return apiFns.convert(handle, text); + }; +} + +function CustomConverter(dictOrString) { + let pairs = []; + if (typeof dictOrString === "string") { + pairs = dictOrString + .split("|") + .map((seg) => seg.trim()) + .filter(Boolean) + .map((seg) => seg.split(/\\s+/)) + .filter((arr) => arr.length >= 2) + .map(([a, b]) => [a, b]); + } else if (Array.isArray(dictOrString)) { + pairs = dictOrString; + } + pairs.sort((a, b) => b[0].length - a[0].length); + return (text) => { + let out = text; + for (const [src, dst] of pairs) { + out = out.split(src).join(dst); + } + return out; + }; +} + +function ConverterFactory(fromLocale, toLocale, extraDicts = []) { + const conv = createConverter({ from: fromLocale, to: toLocale }); + const extras = extraDicts.map((d) => CustomConverter(d)); + return async (text) => { + let result = await conv(text); + extras.forEach((fn) => { + result = fn(result); + }); + return result; + }; +} + +const OpenCC = { + Converter(opts) { + const fn = createConverter(opts); + return (text) => fn(text); + }, + CustomConverter, + ConverterFactory, + Locale: { + from: { cn: "cn", tw: "t", hk: "hk", jp: "jp", t: "t" }, + to: { cn: "cn", tw: "tw", hk: "hk", jp: "jp", t: "t" }, + }, +}; + +module.exports = OpenCC; +module.exports.default = OpenCC; +`; + +fs.writeFileSync(path.join(distCjs, "index.cjs"), cjsShim, "utf-8"); + +console.log("API wrappers built: dist/esm/index.js and dist/cjs/index.cjs"); diff --git a/wasm-lib/scripts/refresh_assets.sh b/wasm-lib/scripts/refresh_assets.sh new file mode 100755 index 000000000..9d434c662 --- /dev/null +++ b/wasm-lib/scripts/refresh_assets.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Regenerate wasm-lib assets from Bazel outputs: +# - data/dictionary/*.ocd2 -> wasm-lib/data/dict/ +# - test/testcases.json -> wasm-lib/test/testcases.json + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "${ROOT}/.." + +echo "Building dictionaries via Bazel..." +bazel build //data/dictionary:binary_dictionaries + +BAZEL_BIN="$(bazel info bazel-bin)" + +DICT_SRC="${BAZEL_BIN}/data/dictionary" +DICT_DST="${ROOT}/data/dict" +mkdir -p "${DICT_DST}" +# Ensure target writable (some checked-in artifacts may be read-only) +chmod -R u+w "${DICT_DST}" +echo "Collecting required .ocd2 names from data/config/*.json" +NEEDED_DICTS=() +while IFS= read -r line; do + [[ -n "$line" ]] && NEEDED_DICTS+=("$line") +done <<< "$(rg -o '"file"\\s*:\\s*"[^"]+\\.ocd2"' data/config/*.json \ + | sed -E 's/.*"file"\\s*:\\s*"([^"]+)".*/\\1/' | sort -u)" + +# If no matches (unexpected), fall back to all .ocd2 +if [[ ${#NEEDED_DICTS[@]} -eq 0 ]]; then + echo "No referenced dicts found via config scan; copying all .ocd2" + while IFS= read -r line; do + [[ -n "$line" ]] && NEEDED_DICTS+=("$line") + done <<< "$(cd "${DICT_SRC}" && ls *.ocd2)" +fi + +echo "Refreshing dicts in ${DICT_DST}" +rm -f "${DICT_DST}"/*.ocd2 +for f in "${NEEDED_DICTS[@]}"; do + install -m 644 "${DICT_SRC}/${f}" "${DICT_DST}/${f}" +done + +CASE_SRC="${ROOT}/../test/testcases/testcases.json" +CASE_DST="${ROOT}/test/testcases.json" +mkdir -p "$(dirname "${CASE_DST}")" +# Remove any old JSON to avoid stale copies +rm -f "${CASE_DST}" +echo "Copying testcases.json from ${CASE_SRC} -> ${CASE_DST}" +install -m 644 "${CASE_SRC}" "${CASE_DST}" + +echo "Done." diff --git a/wasm-lib/src/main.cpp b/wasm-lib/src/main.cpp new file mode 100644 index 000000000..4b25d8ff8 --- /dev/null +++ b/wasm-lib/src/main.cpp @@ -0,0 +1,81 @@ +#include +#include + +#include +#ifdef OPENCC_WASM_WITH_OPENCC +#include "../src/opencc.h" +#endif + +struct Converter { + opencc_t oc; + std::string out; +}; + +static std::unordered_map converters; +static int next_id = 1; + +static const char* throw_error(const char* msg) { + emscripten_throw_string(msg); + return msg; +} + +extern "C" { + +int opencc_create(const char* configPath) { +#ifdef OPENCC_WASM_WITH_OPENCC + if (configPath == nullptr) { + throw_error("opencc_create: null configPath"); + return -1; + } + opencc_t oc = opencc_open(configPath); + if (oc == (opencc_t)-1) { + throw_error("opencc_create: opencc_open failed"); + return -1; + } + int id = next_id++; + converters.emplace(id, Converter{oc, std::string()}); + return id; +#else + (void)configPath; + throw_error("opencc_create: OPENCC_WASM_WITH_OPENCC not enabled"); + return -1; +#endif +} + +const char* opencc_convert(int handle, const char* input) { + static std::string err; +#ifdef OPENCC_WASM_WITH_OPENCC + if (input == nullptr) { + return throw_error("opencc_convert: null input"); + } + auto it = converters.find(handle); + if (it == converters.end()) { + return throw_error("opencc_convert: invalid handle"); + } + char* converted = opencc_convert_utf8(it->second.oc, input, (size_t)-1); + if (converted != nullptr) { + it->second.out.assign(converted); + opencc_convert_utf8_free(converted); + return it->second.out.c_str(); + } + return throw_error("opencc_convert: conversion returned null"); +#else + (void)handle; + (void)input; + return throw_error("opencc_convert: OPENCC_WASM_WITH_OPENCC not enabled"); +#endif +} + +void opencc_destroy(int handle) { +#ifdef OPENCC_WASM_WITH_OPENCC + auto it = converters.find(handle); + if (it != converters.end()) { + opencc_close(it->second.oc); + converters.erase(it); + } +#else + (void)handle; +#endif +} + +} // extern "C" diff --git a/wasm-lib/test/opencc.test.js b/wasm-lib/test/opencc.test.js new file mode 100644 index 000000000..5bbde2d4b --- /dev/null +++ b/wasm-lib/test/opencc.test.js @@ -0,0 +1,32 @@ +import assert from "node:assert/strict"; +import fs from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { test } from "node:test"; +import OpenCC from "../index.js"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const casesPath = path.join(__dirname, "testcases.json"); +const parsed = JSON.parse(fs.readFileSync(casesPath, "utf-8")); +const cases = parsed?.cases || []; + +const converterCache = new Map(); +function getConverter(config) { + if (!converterCache.has(config)) { + converterCache.set(config, OpenCC.Converter({ config })); + } + return converterCache.get(config); +} + +cases.forEach((tc, idx) => { + if (!tc.expected || typeof tc.expected !== "object") return; + Object.entries(tc.expected).forEach(([cfg, expected]) => { + const configName = `${cfg}.json`; + test(`[${configName}] case #${idx + 1}${tc.id ? ` (${tc.id})` : ""}`, async () => { + const convert = getConverter(configName); + const actual = await convert(tc.input); + assert.strictEqual(actual, expected); + }); + }); +}); diff --git a/wasm-lib/test/run.js b/wasm-lib/test/run.js new file mode 100644 index 000000000..0367cf1cd --- /dev/null +++ b/wasm-lib/test/run.js @@ -0,0 +1,54 @@ +import fs from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import OpenCC from "../index.js"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +async function main() { + const casesPath = path.join(__dirname, "testcases.json"); + const parsed = JSON.parse(fs.readFileSync(casesPath, "utf-8")); + const cases = parsed?.cases || []; + if (cases.length === 0) { + console.error("No testcases found"); + process.exit(1); + } + const converters = new Map(); + + let passed = 0; + const failed = []; + const results = []; + for (const tc of cases) { + if (!tc.expected || typeof tc.expected !== "object") continue; + for (const [cfg, expected] of Object.entries(tc.expected)) { + const configName = `${cfg}.json`; + if (!converters.has(configName)) { + converters.set(configName, OpenCC.Converter({ config: configName })); + } + const conv = converters.get(configName); + const actual = await conv(tc.input); + results.push({ ...tc, config: configName, actual }); + if (actual === expected) { + passed++; + } else { + failed.push({ ...tc, config: configName, expected, actual }); + } + } + } + + console.log(`Total: ${results.length}, Passed: ${passed}, Failed: ${failed.length}`); + if (failed.length) { + failed.slice(0, 5).forEach((f, idx) => { + console.log( + `FAIL #${idx} [${f.config}]\n input: ${f.input}\n expect: ${f.expected}\n actual: ${f.actual}` + ); + }); + process.exitCode = 1; + } +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); diff --git a/wasm-lib/test/testcases.json b/wasm-lib/test/testcases.json new file mode 100644 index 000000000..98c0e3515 --- /dev/null +++ b/wasm-lib/test/testcases.json @@ -0,0 +1,372 @@ +{ + "cases": [ + { + "id": "case_001", + "input": "虛偽歎息", + "expected": { + "hk2s": "虚伪叹息" + } + }, + { + "id": "case_002", + "input": "潮濕灶台", + "expected": { + "hk2s": "潮湿灶台" + } + }, + { + "id": "case_003", + "input": "讚歎沙河涌洶湧的波浪", + "expected": { + "hk2s": "赞叹沙河涌汹涌的波浪" + } + }, + { + "id": "case_004", + "input": "為賦新詞強説愁", + "expected": { + "hk2t": "爲賦新詞強說愁" + } + }, + { + "id": "case_005", + "input": "想到自己一緊張就口吃,我就沒胃口吃飯", + "expected": { + "hk2t": "想到自己一緊張就口吃,我就沒胃口喫飯", + "tw2t": "想到自己一緊張就口吃,我就沒胃口喫飯" + } + }, + { + "id": "case_006", + "input": "旧字体歴史仮名遣 新字体現代仮名遣", + "expected": { + "jp2t": "舊字體歷史假名遣 新字體現代假名遣" + } + }, + { + "id": "case_007", + "input": "横浜 糸魚川 伊予国", + "expected": { + "jp2t": "橫濱 絲魚川 伊豫國" + } + }, + { + "id": "case_008", + "input": "駅弁当 弁別 弁護士 弁膜", + "expected": { + "jp2t": "驛辨當 辨別 辯護士 瓣膜" + } + }, + { + "id": "case_009", + "input": "芸術 欠航 欠缺 飲料缶", + "expected": { + "jp2t": "藝術 缺航 欠缺 飲料罐" + } + }, + { + "id": "case_010", + "input": "虚伪叹息", + "expected": { + "s2hk": "虛偽嘆息" + } + }, + { + "id": "case_011", + "input": "潮湿灶台", + "expected": { + "s2hk": "潮濕灶台" + } + }, + { + "id": "case_012", + "input": "赞叹沙河涌汹涌的波浪", + "expected": { + "s2hk": "讚歎沙河涌洶湧的波浪" + } + }, + { + "id": "case_013", + "input": "为了核实这说法", + "expected": { + "s2hk": "為了核實這説法" + } + }, + { + "id": "case_014", + "input": "高剂量的苦瓜素还会抑制胚胎发育", + "expected": { + "s2hk": "高劑量的苦瓜素還會抑制胚胎發育", + "s2t": "高劑量的苦瓜素還會抑制胚胎發育", + "s2tw": "高劑量的苦瓜素還會抑制胚胎發育", + "s2twp": "高劑量的苦瓜素還會抑制胚胎發育" + } + }, + { + "id": "case_015", + "input": "夸夸其谈 夸父逐日", + "expected": { + "s2t": "誇誇其談 夸父逐日" + } + }, + { + "id": "case_016", + "input": "我干什么不干你事。", + "expected": { + "s2t": "我幹什麼不干你事。" + } + }, + { + "id": "case_017", + "input": "太后的头发很干燥。", + "expected": { + "s2t": "太后的頭髮很乾燥。" + } + }, + { + "id": "case_018", + "input": "燕燕于飞,差池其羽。之子于归,远送于野。", + "expected": { + "s2t": "燕燕于飛,差池其羽。之子于歸,遠送於野。" + } + }, + { + "id": "case_019", + "input": "请成相,世之殃,愚暗愚暗堕贤良。人主无贤,如瞽无相何伥伥!请布基,慎圣人,愚而自专事不治。主忌苟胜,群臣莫谏必逢灾。", + "expected": { + "s2t": "請成相,世之殃,愚闇愚闇墮賢良。人主無賢,如瞽無相何倀倀!請布基,慎聖人,愚而自專事不治。主忌苟勝,羣臣莫諫必逢災。" + } + }, + { + "id": "case_020", + "input": "曾经有一份真诚的爱情放在我面前,我没有珍惜,等我失去的时候我才后悔莫及。人事间最痛苦的事莫过于此。如果上天能够给我一个再来一次得机会,我会对那个女孩子说三个字,我爱你。如果非要在这份爱上加个期限,我希望是,一万年。", + "expected": { + "s2t": "曾經有一份真誠的愛情放在我面前,我沒有珍惜,等我失去的時候我才後悔莫及。人事間最痛苦的事莫過於此。如果上天能夠給我一個再來一次得機會,我會對那個女孩子說三個字,我愛你。如果非要在這份愛上加個期限,我希望是,一萬年。" + } + }, + { + "id": "case_021", + "input": "新的理论被发现了。", + "expected": { + "s2t": "新的理論被發現了。" + } + }, + { + "id": "case_022", + "input": "金胄不是金色的甲胄。", + "expected": { + "s2t": "金胄不是金色的甲冑。" + } + }, + { + "id": "case_023", + "input": "经理发现后劝谕两人", + "expected": { + "s2t": "經理發現後勸諭兩人" + } + }, + { + "id": "case_024", + "input": "想到自己一紧张就口吃,我就没胃口吃饭", + "expected": { + "s2t": "想到自己一緊張就口吃,我就沒胃口喫飯" + } + }, + { + "id": "case_025", + "input": "恒指最新消息,恒生指数跌破 2 万点", + "expected": { + "s2t": "恒指最新消息,恒生指數跌破 2 萬點" + } + }, + { + "id": "case_026", + "input": "恒生银行和恒大集团发布财报", + "expected": { + "s2t": "恒生銀行和恒大集團發佈財報" + } + }, + { + "id": "case_027", + "input": "着装污染虚伪发泄棱柱群众里面", + "expected": { + "s2tw": "著裝汙染虛偽發洩稜柱群眾裡面" + } + }, + { + "id": "case_028", + "input": "鲶鱼和鲇鱼是一种生物。", + "expected": { + "s2tw": "鯰魚和鯰魚是一種生物。" + } + }, + { + "id": "case_029", + "input": "鼠标里面的硅二极管坏了,导致光标分辨率降低。", + "expected": { + "s2twp": "滑鼠裡面的矽二極體壞了,導致游標解析度降低。" + } + }, + { + "id": "case_030", + "input": "我们在老挝的服务器的硬盘需要使用互联网算法软件解决异步的问题。", + "expected": { + "s2twp": "我們在寮國的伺服器的硬碟需要使用網際網路演算法軟體解決非同步的問題。" + } + }, + { + "id": "case_031", + "input": "为什么你在床里面睡着?", + "expected": { + "s2twp": "為什麼你在床裡面睡著?" + } + }, + { + "id": "case_032", + "input": "海内存知己", + "expected": { + "s2twp": "海內存知己" + } + }, + { + "id": "case_033", + "input": "摩尔线程", + "expected": { + "s2twp": "摩爾線程" + } + }, + { + "id": "case_034", + "input": "字节跳动", + "expected": { + "s2twp": "字節跳動" + } + }, + { + "id": "case_035", + "input": "潮溼的露臺", + "expected": { + "t2hk": "潮濕的露台" + } + }, + { + "id": "case_036", + "input": "爲了覈實這說法", + "expected": { + "t2hk": "為了核實這説法" + } + }, + { + "id": "case_037", + "input": "包糉子活動告一段落", + "expected": { + "t2hk": "包粽子活動告一段落" + } + }, + { + "id": "case_038", + "input": "舊字體歷史假名遣 新字體現代假名遣", + "expected": { + "t2jp": "旧字体歴史仮名遣 新字体現代仮名遣" + } + }, + { + "id": "case_039", + "input": "橫濱 絲魚川 伊豫國", + "expected": { + "t2jp": "横浜 糸魚川 伊予国" + } + }, + { + "id": "case_040", + "input": "驛辨當 辨別 辯護士 瓣膜", + "expected": { + "t2jp": "駅弁当 弁別 弁護士 弁膜" + } + }, + { + "id": "case_041", + "input": "藝術 缺航 飲料罐", + "expected": { + "t2jp": "芸術 欠航 飲料缶" + } + }, + { + "id": "case_042", + "input": "曾經有一份真誠的愛情放在我面前,我沒有珍惜,等我失去的時候我才後悔莫及。人事間最痛苦的事莫過於此。如果上天能夠給我一個再來一次得機會,我會對那個女孩子說三個字,我愛你。如果非要在這份愛上加個期限,我希望是,一萬年。", + "expected": { + "t2s": "曾经有一份真诚的爱情放在我面前,我没有珍惜,等我失去的时候我才后悔莫及。人事间最痛苦的事莫过于此。如果上天能够给我一个再来一次得机会,我会对那个女孩子说三个字,我爱你。如果非要在这份爱上加个期限,我希望是,一万年。" + } + }, + { + "id": "case_043", + "input": "二噁英", + "expected": { + "t2s": "二𫫇英" + } + }, + { + "id": "case_044", + "input": "著裝著作汙染虛偽發洩稜柱群眾裡面", + "expected": { + "tw2s": "着装著作污染虚伪发泄棱柱群众里面" + } + }, + { + "id": "case_045", + "input": "滑鼠裡面的矽二極體壞了,導致游標解析度降低。", + "expected": { + "tw2sp": "鼠标里面的硅二极管坏了,导致光标分辨率降低。" + } + }, + { + "id": "case_046", + "input": "我們在寮國的伺服器的硬碟需要使用網際網路演算法軟體解決非同步的問題。", + "expected": { + "tw2sp": "我们在老挝的服务器的硬盘需要使用互联网算法软件解决异步的问题。" + } + }, + { + "id": "case_047", + "input": "為什麼你在床裡面睡著?", + "expected": { + "tw2sp": "为什么你在床里面睡着?" + } + }, + { + "id": "case_048", + "input": "用滑鼠點選正規表示式", + "expected": { + "tw2sp": "用鼠标点击正则表达式" + } + }, + { + "id": "case_049", + "input": "KB大橋也被視為帛琉人的後花園", + "expected": { + "tw2sp": "KB大桥也被视为帕劳人的后花园" + } + }, + { + "id": "case_050", + "input": "這個軟體裡有一套軟體動物的資料庫", + "expected": { + "tw2sp": "这个软件里有一套软体动物的数据库" + } + }, + { + "id": "case_051", + "input": "為了眾人化妝床頭裡面衛生,醞釀群峰鐵鉤嘆氣事件", + "expected": { + "tw2t": "爲了衆人化妝牀頭裏面衛生,醞釀羣峯鐵鉤嘆氣事件" + } + }, + { + "id": "case_052", + "input": "在廚房裡做手擀麵", + "expected": { + "tw2t": "在廚房裏做手擀麪" + } + } + ] +}