sgl-project
diff --git a/‎_sources/advanced_features/lora.ipynb‎
Lines changed: 154 additions & 167 deletions b/‎_sources/advanced_features/lora.ipynb‎
Lines changed: 154 additions & 167 deletions
diff --git a/‎_sources/advanced_features/separate_reasoning.ipynb‎
Lines changed: 98 additions & 115 deletions b/‎_sources/advanced_features/separate_reasoning.ipynb‎
Lines changed: 98 additions & 115 deletions
diff --git a/‎_sources/advanced_features/speculative_decoding.ipynb‎
Lines changed: 367 additions & 392 deletions b/‎_sources/advanced_features/speculative_decoding.ipynb‎
Lines changed: 367 additions & 392 deletions
diff --git a/‎_sources/advanced_features/structured_outputs.ipynb‎
Lines changed: 138 additions & 131 deletions b/‎_sources/advanced_features/structured_outputs.ipynb‎
Lines changed: 138 additions & 131 deletions
diff --git a/‎_sources/advanced_features/structured_outputs_for_reasoning_models.ipynb‎
Lines changed: 215 additions & 180 deletions b/‎_sources/advanced_features/structured_outputs_for_reasoning_models.ipynb‎
Lines changed: 215 additions & 180 deletions
diff --git a/‎_sources/advanced_features/tool_parser.ipynb‎
Lines changed: 179 additions & 159 deletions b/‎_sources/advanced_features/tool_parser.ipynb‎
Lines changed: 179 additions & 159 deletions
diff --git a/‎_sources/advanced_features/vlm_query.ipynb‎
Lines changed: 268 additions & 268 deletions b/‎_sources/advanced_features/vlm_query.ipynb‎
Lines changed: 268 additions & 268 deletions
diff --git a/‎_sources/basic_usage/native_api.ipynb‎
Lines changed: 155 additions & 161 deletions b/‎_sources/basic_usage/native_api.ipynb‎
Lines changed: 155 additions & 161 deletions
diff --git a/‎_sources/basic_usage/offline_engine_api.ipynb‎
Lines changed: 475 additions & 482 deletions b/‎_sources/basic_usage/offline_engine_api.ipynb‎
Lines changed: 475 additions & 482 deletions
diff --git a/‎_sources/basic_usage/openai_api_completions.ipynb‎
Lines changed: 51 additions & 61 deletions b/‎_sources/basic_usage/openai_api_completions.ipynb‎
Lines changed: 51 additions & 61 deletions
@@ -31,10 +31,10 @@
    "execution_count": 1,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-10-06T03:00:32.007007Z",
-     "iopub.status.busy": "2025-10-06T03:00:32.006876Z",
-     "iopub.status.idle": "2025-10-06T03:01:13.019849Z",
-     "shell.execute_reply": "2025-10-06T03:01:13.019316Z"
+     "iopub.execute_input": "2025-10-06T03:37:49.994905Z",
+     "iopub.status.busy": "2025-10-06T03:37:49.994771Z",
+     "iopub.status.idle": "2025-10-06T03:38:40.631780Z",
+     "shell.execute_reply": "2025-10-06T03:38:40.631244Z"
     }
    },
    "outputs": [
@@ -76,13 +76,7 @@
      "output_type": "stream",
      "text": [
       "/usr/local/lib/python3.10/dist-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you.\n",
-      "  import pynvml  # type: ignore[import]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "  import pynvml  # type: ignore[import]\n",
       "/usr/local/lib/python3.10/dist-packages/torch/cuda/__init__.py:63: FutureWarning: The pynvml package is deprecated. Please install nvidia-ml-py instead. If you did not install pynvml directly, please report this to the maintainers of the package that installed pynvml for you.\n",
       "  import pynvml  # type: ignore[import]\n"
      ]
@@ -92,7 +86,7 @@
      "output_type": "stream",
      "text": [
       "`torch_dtype` is deprecated! Use `dtype` instead!\n",
-      "[2025-10-06 03:01:01] `torch_dtype` is deprecated! Use `dtype` instead!\n"
+      "[2025-10-06 03:38:25] `torch_dtype` is deprecated! Use `dtype` instead!\n"
      ]
     },
     {
@@ -102,53 +96,49 @@
       "[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0\n",
       "[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0\n",
       "[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0\n",
-      "[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[2025-10-06 03:01:02] MOE_RUNNER_BACKEND is not initialized, using triton backend\n"
+      "[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0\n",
+      "[2025-10-06 03:38:29] MOE_RUNNER_BACKEND is not initialized, using triton backend\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "\r",
-      "Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]\n"
+      "Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]\n",
+      "\r",
+      "Loading safetensors checkpoint shards: 100% Completed | 1/1 [00:00<00:00,  6.19it/s]\n",
+      "\r",
+      "Loading safetensors checkpoint shards: 100% Completed | 1/1 [00:00<00:00,  6.18it/s]\n",
+      "\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "\r",
-      "Loading safetensors checkpoint shards: 100% Completed | 1/1 [00:00<00:00,  4.20it/s]\n",
-      "\r",
-      "Loading safetensors checkpoint shards: 100% Completed | 1/1 [00:00<00:00,  4.20it/s]\n",
-      "\n"
+      "  0%|          | 0/3 [00:00<?, ?it/s]\r",
+      "Capturing batches (bs=4 avail_mem=71.53 GB):   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "\r",
-      "  0%|          | 0/3 [00:00<?, ?it/s]\r",
-      "Capturing batches (bs=4 avail_mem=77.03 GB):   0%|          | 0/3 [00:00<?, ?it/s]"
+      "Capturing batches (bs=4 avail_mem=71.53 GB):  33%|███▎      | 1/3 [00:00<00:00,  2.84it/s]\r",
+      "Capturing batches (bs=2 avail_mem=71.46 GB):  33%|███▎      | 1/3 [00:00<00:00,  2.84it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "\r",
-      "Capturing batches (bs=4 avail_mem=77.03 GB):  33%|███▎      | 1/3 [00:00<00:01,  1.53it/s]\r",
-      "Capturing batches (bs=2 avail_mem=76.97 GB):  33%|███▎      | 1/3 [00:00<00:01,  1.53it/s]\r",
-      "Capturing batches (bs=1 avail_mem=76.96 GB):  33%|███▎      | 1/3 [00:00<00:01,  1.53it/s]\r",
-      "Capturing batches (bs=1 avail_mem=76.96 GB): 100%|██████████| 3/3 [00:00<00:00,  4.17it/s]\n"
+      "Capturing batches (bs=2 avail_mem=71.46 GB):  67%|██████▋   | 2/3 [00:00<00:00,  3.80it/s]\r",
+      "Capturing batches (bs=1 avail_mem=71.46 GB):  67%|██████▋   | 2/3 [00:00<00:00,  3.80it/s]\r",
+      "Capturing batches (bs=1 avail_mem=71.46 GB): 100%|██████████| 3/3 [00:00<00:00,  5.01it/s]\n"
      ]
     },
     {
@@ -167,7 +157,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Server started on http://localhost:31747\n"
+      "Server started on http://localhost:30884\n"
      ]
     }
    ],
@@ -201,17 +191,17 @@
    "execution_count": 2,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-10-06T03:01:13.024173Z",
-     "iopub.status.busy": "2025-10-06T03:01:13.023033Z",
-     "iopub.status.idle": "2025-10-06T03:01:13.307561Z",
-     "shell.execute_reply": "2025-10-06T03:01:13.307079Z"
+     "iopub.execute_input": "2025-10-06T03:38:40.633900Z",
+     "iopub.status.busy": "2025-10-06T03:38:40.633563Z",
+     "iopub.status.idle": "2025-10-06T03:38:40.897940Z",
+     "shell.execute_reply": "2025-10-06T03:38:40.897465Z"
     }
    },
    "outputs": [
     {
      "data": {
       "text/html": [
-       "<strong style='color: #00008B;'>Response: ChatCompletion(id='22b3d25954cb404ca70949629f098753', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Sure, here are three countries and their respective capitals:\\n\\n1. **United States** - Washington, D.C.\\n2. **Canada** - Ottawa\\n3. **Australia** - Canberra', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, reasoning_content=None), matched_stop=151645)], created=1759719673, model='qwen/qwen2.5-0.5b-instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=39, prompt_tokens=37, total_tokens=76, completion_tokens_details=None, prompt_tokens_details=None, reasoning_tokens=0), metadata={'weight_version': 'default'})</strong>"
+       "<strong style='color: #00008B;'>Response: ChatCompletion(id='1b89b2c731db480b8715ea001e218847', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Sure, here are three countries and their respective capitals:\\n\\n1. **United States** - Washington, D.C.\\n2. **Canada** - Ottawa\\n3. **Australia** - Canberra', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None, reasoning_content=None), matched_stop=151645)], created=1759721920, model='qwen/qwen2.5-0.5b-instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=39, prompt_tokens=37, total_tokens=76, completion_tokens_details=None, prompt_tokens_details=None, reasoning_tokens=0), metadata={'weight_version': 'default'})</strong>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -401,17 +391,17 @@
    "execution_count": 3,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-10-06T03:01:13.309570Z",
-     "iopub.status.busy": "2025-10-06T03:01:13.309423Z",
-     "iopub.status.idle": "2025-10-06T03:01:13.543919Z",
-     "shell.execute_reply": "2025-10-06T03:01:13.543474Z"
+     "iopub.execute_input": "2025-10-06T03:38:40.899554Z",
+     "iopub.status.busy": "2025-10-06T03:38:40.899407Z",
+     "iopub.status.idle": "2025-10-06T03:38:41.150350Z",
+     "shell.execute_reply": "2025-10-06T03:38:41.149891Z"
     }
    },
    "outputs": [
     {
      "data": {
       "text/html": [
-       "<strong style='color: #00008B;'>The ancient Romans made significant contributions to various fields, including law, philosophy, science, and literature. They were known for their engineering skills, particularly in the construction of monumental architecture like the Colosseum and the Pantheon. The Roman Empire was one of the largest and most powerful empires in history, spanning across Europe, Asia Minor, and North Africa. Their cultural achievements included the development of Roman law and the spread of Christianity. Additionally, they were skilled in agriculture and had a sophisticated system of governance.</strong>"
+       "<strong style='color: #00008B;'>Ancient Rome was a significant civilization that made significant contributions to human history. Some of their major achievements include:<br><br>1. The construction of the Colosseum, one of the largest amphitheaters in the world, which served as a venue for gladiatorial games and other public spectacles.<br><br>2. The development of Roman law, which established the principles of justice and legal procedures that are still used today.<br><br>3. The invention of the horse-drawn carriage, which allowed for faster transportation and trade.<br><br>4. The construction of aqueducts that provided water to cities and towns, improving living conditions and economic activity.<br><br>5. The</strong>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -460,18 +450,18 @@
    "execution_count": 4,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-10-06T03:01:13.545307Z",
-     "iopub.status.busy": "2025-10-06T03:01:13.545169Z",
-     "iopub.status.idle": "2025-10-06T03:01:13.655547Z",
-     "shell.execute_reply": "2025-10-06T03:01:13.654789Z"
+     "iopub.execute_input": "2025-10-06T03:38:41.151833Z",
+     "iopub.status.busy": "2025-10-06T03:38:41.151685Z",
+     "iopub.status.idle": "2025-10-06T03:38:41.252737Z",
+     "shell.execute_reply": "2025-10-06T03:38:41.252288Z"
     }
    },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Yes, \"test\" is a code word for a survey or a sample in various contexts. It's common to use \"test\" to refer to a small group of people or a particular population whose opinions or behaviors will be surveyed in a testing or evaluation activity."
+      "Yes, I am Qwen, a language model created by Alibaba Cloud. My purpose is to assist you with any questions or tasks you may have. If you have any questions or would like to talk about a specific topic, feel free to ask!"
      ]
     }
    ],
@@ -501,17 +491,17 @@
    "execution_count": 5,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-10-06T03:01:13.657280Z",
-     "iopub.status.busy": "2025-10-06T03:01:13.656894Z",
-     "iopub.status.idle": "2025-10-06T03:01:13.822803Z",
-     "shell.execute_reply": "2025-10-06T03:01:13.822237Z"
+     "iopub.execute_input": "2025-10-06T03:38:41.254144Z",
+     "iopub.status.busy": "2025-10-06T03:38:41.254004Z",
+     "iopub.status.idle": "2025-10-06T03:38:41.404226Z",
+     "shell.execute_reply": "2025-10-06T03:38:41.403853Z"
     }
    },
    "outputs": [
     {
      "data": {
       "text/html": [
-       "<strong style='color: #00008B;'>Response: Completion(id='0a9bef63fc134f77b60a8501ba5d912d', choices=[CompletionChoice(finish_reason='length', index=0, logprobs=None, text=' 1. United States - Washington D.C.\\n2. Canada - Ottawa\\n3. France - Paris\\n4. Germany - Berlin\\n5. Japan - Tokyo\\n6. Italy - Rome\\n7. Spain - Madrid\\n8. United Kingdom - London\\n9. Australia - Canberra\\n10. New Zealand', matched_stop=None)], created=1759719673, model='qwen/qwen2.5-0.5b-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=64, prompt_tokens=8, total_tokens=72, completion_tokens_details=None, prompt_tokens_details=None, reasoning_tokens=0), metadata={'weight_version': 'default'})</strong>"
+       "<strong style='color: #00008B;'>Response: Completion(id='3be3295d77d44db9a1843f154ee197a7', choices=[CompletionChoice(finish_reason='length', index=0, logprobs=None, text=' 1. United States - Washington D.C.\\n2. Canada - Ottawa\\n3. France - Paris\\n4. Germany - Berlin\\n5. Japan - Tokyo\\n6. Italy - Rome\\n7. Spain - Madrid\\n8. United Kingdom - London\\n9. Australia - Canberra\\n10. New Zealand', matched_stop=None)], created=1759721921, model='qwen/qwen2.5-0.5b-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=64, prompt_tokens=8, total_tokens=72, completion_tokens_details=None, prompt_tokens_details=None, reasoning_tokens=0), metadata={'weight_version': 'default'})</strong>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -550,17 +540,17 @@
    "execution_count": 6,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-10-06T03:01:13.824422Z",
-     "iopub.status.busy": "2025-10-06T03:01:13.824267Z",
-     "iopub.status.idle": "2025-10-06T03:01:13.902031Z",
-     "shell.execute_reply": "2025-10-06T03:01:13.901479Z"
+     "iopub.execute_input": "2025-10-06T03:38:41.405600Z",
+     "iopub.status.busy": "2025-10-06T03:38:41.405462Z",
+     "iopub.status.idle": "2025-10-06T03:38:41.558349Z",
+     "shell.execute_reply": "2025-10-06T03:38:41.557974Z"
     }
    },
    "outputs": [
     {
      "data": {
       "text/html": [
-       "<strong style='color: #00008B;'>Response: Completion(id='b96d15fb328c43c8afb3db6bf8ab7023', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' As an AI language model, I do not have personal experiences or emotions, but I can generate a fictional story based on common themes and events that people might encounter in space travel.', matched_stop='\\n\\n')], created=1759719673, model='qwen/qwen2.5-0.5b-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=36, prompt_tokens=9, total_tokens=45, completion_tokens_details=None, prompt_tokens_details=None, reasoning_tokens=0), metadata={'weight_version': 'default'})</strong>"
+       "<strong style='color: #00008B;'>Response: Completion(id='8061143f030d483183cae76728e120c0', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' Once upon a time, there was a space explorer named Dr. Amelia Hart. She had been on many missions to explore the stars and planets, but never before had she been in space with anyone.', matched_stop='\\n\\n')], created=1759721921, model='qwen/qwen2.5-0.5b-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=40, prompt_tokens=9, total_tokens=49, completion_tokens_details=None, prompt_tokens_details=None, reasoning_tokens=0), metadata={'weight_version': 'default'})</strong>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -601,10 +591,10 @@
    "execution_count": 7,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2025-10-06T03:01:13.903616Z",
-     "iopub.status.busy": "2025-10-06T03:01:13.903457Z",
-     "iopub.status.idle": "2025-10-06T03:01:13.943927Z",
-     "shell.execute_reply": "2025-10-06T03:01:13.943243Z"
+     "iopub.execute_input": "2025-10-06T03:38:41.559683Z",
+     "iopub.status.busy": "2025-10-06T03:38:41.559549Z",
+     "iopub.status.idle": "2025-10-06T03:38:41.618507Z",
+     "shell.execute_reply": "2025-10-06T03:38:41.617987Z"
     }
    },
    "outputs": [],