openvinotoolkit
diff --git a/‎samples/js/text_generation/benchmark_genai.js‎
Lines changed: 4 additions & 0 deletions b/‎samples/js/text_generation/benchmark_genai.js‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎site/docs/bindings/node-js.md‎
Lines changed: 6 additions & 0 deletions b/‎site/docs/bindings/node-js.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎site/docs/guides/tokenization.mdx‎
Lines changed: 55 additions & 0 deletions b/‎site/docs/guides/tokenization.mdx‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎src/js/eslint.config.cjs‎
Lines changed: 7 additions & 0 deletions b/‎src/js/eslint.config.cjs‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/js/include/addon.hpp‎
Lines changed: 1 addition & 0 deletions b/‎src/js/include/addon.hpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/js/include/helper.hpp‎
Lines changed: 14 additions & 0 deletions b/‎src/js/include/helper.hpp‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎src/js/include/tokenizer.hpp‎
Lines changed: 6 additions & 0 deletions b/‎src/js/include/tokenizer.hpp‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/js/lib/addon.ts‎
Lines changed: 7 additions & 2 deletions b/‎src/js/lib/addon.ts‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎src/js/lib/index.ts‎
Lines changed: 2 additions & 0 deletions b/‎src/js/lib/index.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/js/lib/pipelines/llmPipeline.ts‎
Lines changed: 3 additions & 19 deletions b/‎src/js/lib/pipelines/llmPipeline.ts‎
Lines changed: 3 additions & 19 deletions
@@ -90,6 +90,10 @@ async function main() {
     pipe = await LLMPipeline(modelsPath, device, { schedulerConfig: schedulerConfig });
   }
 
+  const inputData = await pipe.getTokenizer().encode(prompt);
+  const promptTokenSize = inputData.input_ids.getShape()[1];
+  console.log(`Prompt token size: ${promptTokenSize}`);
+
   for (let i = 0; i < numWarmup; i++) {
     await pipe.generate(prompt, config);
   }
 
@@ -24,6 +24,12 @@ Node.js bindings currently support:
   - Structured output
   - ReAct agent support
 - `TextEmbeddingPipeline`: Generate text embeddings for semantic search and RAG applications
+- `Tokenizer`: Fast tokenization / detokenization and chat prompt formatting
+  - Encode strings into token id and attention mask tensors
+  - Decode token sequences
+  - Apply chat template
+  - Access special tokens (BOS/EOS/PAD)
+  - Supports paired input
 
 ## Installation
 
 
@@ -34,6 +34,20 @@ It can be initialized from the path, in-memory IR representation or obtained fro
         auto tokenzier = pipe.get_tokenizer();
         ```
     </TabItemCpp>
+    <TabItemJS>
+        ```js
+        import { LLMPipeline, Tokenizer } from 'openvino-genai-node';
+
+        let tokenizer;
+
+        // Initialize from the path
+        tokenizer = new Tokenizer(models_path);
+
+        // Or get tokenizer instance from LLMPipeline
+        const pipe = await LLMPipeline(models_path, "CPU");
+        tokenizer = pipe.getTokenizer();
+        ```
+    </TabItemJS>
 </LanguageTabs>
 
 `Tokenizer` has `encode()` and `decode()` methods which support the following arguments: `add_special_tokens`, `skip_special_tokens`, `pad_to_max_length`, `max_length`.
@@ -51,6 +65,11 @@ It can be initialized from the path, in-memory IR representation or obtained fro
         auto tokens = tokenizer.encode("The Sun is yellow because", ov::genai::add_special_tokens(false));
         ```
     </TabItemCpp>
+    <TabItemJS>
+        ```js
+        const tokens = tokenizer.encode("The Sun is yellow because", { add_special_tokens: false });
+        ```
+    </TabItemJS>
 </LanguageTabs>
 
 The `encode()` method returns a [`TokenizedInputs`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.TokenizedInputs.html) object containing `input_ids` and `attention_mask`, both stored as `ov::Tensor`.
@@ -121,4 +140,40 @@ If `pad_to_max_length` is set to true, then instead of padding to the longest se
         // out_shape: [1, 128]
         ```
     </TabItemCpp>
+    <TabItemJS>
+        ```js
+        import { Tokenizer } from 'openvino-genai-node';
+
+        const tokenizer = new Tokenizer(models_path);
+        const prompts = ["The Sun is yellow because", "The"];
+        let tokens;
+
+        // Since prompt is definitely shorter than maximal length (which is taken from IR) will not affect shape.
+        // Resulting shape is defined by length of the longest tokens sequence.
+        // Equivalent of HuggingFace hf_tokenizer.encode(prompt, padding="longest", truncation=True)
+        tokens = tokenizer.encode(["The Sun is yellow because", "The"]);
+        // or is equivalent to
+        tokens = tokenizer.encode(["The Sun is yellow because", "The"], { pad_to_max_length: false });
+        console.log(tokens.input_ids.getShape());
+        // out_shape: [2, 6]
+        
+        // Resulting tokens tensor will be padded to 1024, sequences which exceed this length will be truncated.
+        // Equivalent of HuggingFace hf_tokenizer.encode(prompt, padding="max_length", truncation=True, max_length=1024)
+        tokens = tokenizer.encode([
+            "The Sun is yellow because",
+            "The",
+            "The longest string ever".repeat(2000),
+        ], {
+            pad_to_max_length: true,
+            max_length: 1024,
+        });
+        console.log(tokens.input_ids.getShape());
+        // out_shape: [3, 1024]
+
+        // For single string prompts truncation and padding are also applied.
+        tokens = tokenizer.encode("The Sun is yellow because", { pad_to_max_length: true, max_length: 128 });
+        console.log(tokens.input_ids.getShape());
+        // out_shape: [1, 128]
+        ```
+    </TabItemJS>
 </LanguageTabs>
@@ -53,6 +53,13 @@ module.exports = defineConfig([
             "json_schema",
             "structured_output_config",
             "structural_tags_config",
+            "skip_special_tokens",
+            "add_special_tokens",
+            "pad_to_max_length",
+            "max_length",
+            "padding_side",
+            "add_second_input",
+            "number_of_inputs",
           ],
         },
       ],
 
@@ -12,6 +12,7 @@ struct AddonData {
     Napi::FunctionReference tokenizer;
     Napi::FunctionReference perf_metrics;
     Napi::FunctionReference chat_history;
+    Napi::ObjectReference openvino_addon;
 };
 
 void init_class(Napi::Env env,
 
@@ -37,9 +37,13 @@ ov::AnyMap js_to_cpp<ov::AnyMap>(const Napi::Env& env, const Napi::Value& value)
 /** @brief  A template specialization for TargetType std::string */
 template <>
 std::string js_to_cpp<std::string>(const Napi::Env& env, const Napi::Value& value);
+template <>
+int64_t js_to_cpp<int64_t>(const Napi::Env& env, const Napi::Value& value);
 /** @brief  A template specialization for TargetType std::vector<std::string> */
 template <>
 std::vector<std::string> js_to_cpp<std::vector<std::string>>(const Napi::Env& env, const Napi::Value& value);
+template <>
+std::vector<int64_t> js_to_cpp<std::vector<int64_t>>(const Napi::Env& env, const Napi::Value& value);
 /** @brief  A template specialization for TargetType GenerateInputs */
 template <>
 GenerateInputs js_to_cpp<GenerateInputs>(const Napi::Env& env, const Napi::Value& value);
@@ -58,6 +62,8 @@ ov::genai::StructuredOutputConfig::Tag js_to_cpp<ov::genai::StructuredOutputConf
 /** @brief  A template specialization for TargetType ov::genai::StructuredOutputConfig::StructuralTag */
 template <>
 ov::genai::StructuredOutputConfig::StructuralTag js_to_cpp<ov::genai::StructuredOutputConfig::StructuralTag>(const Napi::Env& env, const Napi::Value& value);
+template <>
+ov::Tensor js_to_cpp<ov::Tensor>(const Napi::Env& env, const Napi::Value& value);
 /**
  * @brief  Unwraps a C++ object from a JavaScript wrapper.
  * @tparam TargetType The C++ class type to extract.
@@ -110,6 +116,12 @@ Napi::Value cpp_to_js<std::vector<size_t>, Napi::Value>(const Napi::Env& env, co
 
 template <>
 Napi::Value cpp_to_js<ov::genai::JsonContainer, Napi::Value>(const Napi::Env& env, const ov::genai::JsonContainer& json_container);
+
+template <>
+Napi::Value cpp_to_js<ov::Tensor, Napi::Value>(const Napi::Env& env, const ov::Tensor& tensor);
+
+template <>
+Napi::Value cpp_to_js<ov::genai::TokenizedInputs, Napi::Value>(const Napi::Env& env, const ov::genai::TokenizedInputs& tokenized_inputs);
 /**
  * @brief  Template function to convert C++ map into Javascript Object. Map key must be std::string.
  * @tparam MapElementType C++ data type of map elements.
@@ -130,3 +142,5 @@ bool is_chat_history(const Napi::Env& env, const Napi::Value& value);
 std::string json_stringify(const Napi::Env& env, const Napi::Value& value);
 
 Napi::Value json_parse(const Napi::Env& env, const std::string& value);
+
+Napi::Function get_prototype_from_ov_addon(const Napi::Env& env, const std::string& ctor_name);
@@ -15,6 +15,12 @@ class TokenizerWrapper : public Napi::ObjectWrap<TokenizerWrapper> {
     Napi::Value get_eos_token_id(const Napi::CallbackInfo& info);
     Napi::Value get_pad_token(const Napi::CallbackInfo& info);
     Napi::Value get_pad_token_id(const Napi::CallbackInfo& info);
+    Napi::Value get_chat_template(const Napi::CallbackInfo& info);
+    Napi::Value get_original_chat_template(const Napi::CallbackInfo& info);
+    Napi::Value set_chat_template(const Napi::CallbackInfo& info);
+    Napi::Value supports_paired_input(const Napi::CallbackInfo& info);
+    Napi::Value encode(const Napi::CallbackInfo& info);
+    Napi::Value decode(const Napi::CallbackInfo& info);
 private:
     ov::genai::Tokenizer _tokenizer;
 };
@@ -2,6 +2,8 @@ import { createRequire } from "module";
 import { platform } from "node:os";
 import { join, dirname, resolve } from "node:path";
 import type { ChatHistory as IChatHistory } from "./chatHistory.js";
+import type { Tokenizer as ITokenizer } from "./tokenizer.js";
+import { addon as ovAddon } from "openvino-node";
 
 export type EmbeddingResult = Float32Array | Int8Array | Uint8Array;
 export type EmbeddingResults = Float32Array[] | Int8Array[] | Uint8Array[];
@@ -60,6 +62,8 @@ interface OpenVINOGenAIAddon {
   TextEmbeddingPipeline: TextEmbeddingPipelineWrapper;
   LLMPipeline: any;
   ChatHistory: IChatHistory;
+  Tokenizer: ITokenizer;
+  setOpenvinoAddon: (ovAddon: any) => void;
 }
 
 // We need to use delayed import to get an updated Path if required
@@ -78,7 +82,8 @@ function getGenAIAddon(): OpenVINOGenAIAddon {
 }
 
 const addon = getGenAIAddon();
+addon.setOpenvinoAddon(ovAddon);
 
-export const { ChatHistory } = addon;
+export const { TextEmbeddingPipeline, LLMPipeline, ChatHistory, Tokenizer } = addon;
 export type ChatHistory = IChatHistory;
-export default addon;
+export type Tokenizer = ITokenizer;
@@ -40,3 +40,5 @@ export const { LLMPipeline, TextEmbeddingPipeline } = PipelineFactory;
 export { DecodedResults } from "./pipelines/llmPipeline.js";
 export * from "./utils.js";
 export * from "./addon.js";
+export type { TokenizedInputs, EncodeOptions, DecodeOptions } from "./tokenizer.js";
+export type { ChatMessage, ExtraContext, ToolDefinition } from "./chatHistory.js";
@@ -1,30 +1,14 @@
 import util from "node:util";
-import addon, { ChatHistory } from "../addon.js";
+import { ChatHistory, LLMPipeline as LLMPipelineWrap } from "../addon.js";
 import { GenerationConfig, StreamingStatus, LLMPipelineProperties } from "../utils.js";
+import { Tokenizer } from "../tokenizer.js";
 
 export type ResolveFunction = (arg: { value: string; done: boolean }) => void;
 export type Options = {
   disableStreamer?: boolean;
   max_new_tokens?: number;
 };
 
-interface Tokenizer {
-  /** Applies a chat template to format chat history into a prompt string. */
-  applyChatTemplate(
-    chatHistory: Record<string, any>[] | ChatHistory,
-    addGenerationPrompt: boolean,
-    chatTemplate?: string,
-    tools?: Record<string, any>[],
-    extraContext?: Record<string, any>,
-  ): string;
-  getBosToken(): string;
-  getBosTokenId(): number;
-  getEosToken(): string;
-  getEosTokenId(): number;
-  getPadToken(): string;
-  getPadTokenId(): number;
-}
-
 /** Structure with raw performance metrics for each generation before any statistics are calculated. */
 export type RawMetrics = {
   /** Durations for each generate call in milliseconds. */
@@ -167,7 +151,7 @@ export class LLMPipeline {
   async init() {
     if (this.isInitialized) throw new Error("LLMPipeline is already initialized");
 
-    this.pipeline = new addon.LLMPipeline();
+    this.pipeline = new LLMPipelineWrap();
 
     const initPromise = util.promisify(this.pipeline.init.bind(this.pipeline));
     const result = await initPromise(this.modelPath, this.device, this.properties);
Original file line number	Diff line number	Diff line change
`@@ -90,6 +90,10 @@ async function main() {`
`90`	`90`	`pipe = await LLMPipeline(modelsPath, device, { schedulerConfig: schedulerConfig });`
`91`	`91`	`}`
`92`	`92`
	`93`	`+ const inputData = await pipe.getTokenizer().encode(prompt);`
	`94`	`+ const promptTokenSize = inputData.input_ids.getShape()[1];`
	`95`	+ console.log(`Prompt token size: ${promptTokenSize}`);
	`96`	`+`
`93`	`97`	`for (let i = 0; i < numWarmup; i++) {`
`94`	`98`	`await pipe.generate(prompt, config);`
`95`	`99`	`}`