Future-Outlier
diff --git a/‎doc/source/serve/examples.yml‎
Lines changed: 8 additions & 0 deletions b/‎doc/source/serve/examples.yml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎doc/source/serve/llm/index.md‎
Lines changed: 2 additions & 1 deletion b/‎doc/source/serve/llm/index.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎doc/source/serve/tutorials/deployment-serve-llm/README.ipynb‎
Lines changed: 6 additions & 1 deletion b/‎doc/source/serve/tutorials/deployment-serve-llm/README.ipynb‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎doc/source/serve/tutorials/deployment-serve-llm/README.md‎
Lines changed: 5 additions & 0 deletions b/‎doc/source/serve/tutorials/deployment-serve-llm/README.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎doc/source/serve/tutorials/deployment-serve-llm/ci/nb2py.py‎
Lines changed: 11 additions & 1 deletion b/‎doc/source/serve/tutorials/deployment-serve-llm/ci/nb2py.py‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎doc/source/serve/tutorials/deployment-serve-llm/ci/tests.sh‎
Lines changed: 2 additions & 1 deletion b/‎doc/source/serve/tutorials/deployment-serve-llm/ci/tests.sh‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎doc/source/serve/tutorials/deployment-serve-llm/gpt-oss/Dockerfile‎
Lines changed: 8 additions & 0 deletions b/‎doc/source/serve/tutorials/deployment-serve-llm/gpt-oss/Dockerfile‎
Lines changed: 8 additions & 0 deletions
@@ -122,6 +122,14 @@ examples:
       - natural language processing
     link: tutorials/deployment-serve-llm/hybrid-reasoning-llm/README
     related_technology: llm applications
+  - title: Deploy gpt-oss
+    skill_level: beginner
+    use_cases:
+      - generative ai
+      - large language models
+      - natural language processing
+    link: tutorials/deployment-serve-llm/gpt-oss/README
+    related_technology: llm applications
   - title: Serve a Chatbot with Request and Response Streaming
     skill_level: intermediate
     use_cases:
 
@@ -67,4 +67,5 @@ Cache-aware request routing <prefix-aware-request-router>
 - {doc}`Deploy a large-sized LLM <../tutorials/deployment-serve-llm/large-size-llm/README>`
 - {doc}`Deploy a vision LLM <../tutorials/deployment-serve-llm/vision-llm/README>`
 - {doc}`Deploy a reasoning LLM <../tutorials/deployment-serve-llm/reasoning-llm/README>`
-- {doc}`Deploy a hybrid reasoning LLM <../tutorials/deployment-serve-llm/hybrid-reasoning-llm/README>`
+- {doc}`Deploy a hybrid reasoning LLM <../tutorials/deployment-serve-llm/hybrid-reasoning-llm/README>`
+- {doc}`Deploy gpt-oss <../tutorials/deployment-serve-llm/gpt-oss/README>`
@@ -39,7 +39,12 @@
     "---\n",
     "\n",
     "**[Deploy a hybrid reasoning LLM](https://docs.ray.io/en/latest/serve/tutorials/deployment-serve-llm/hybrid-reasoning-llm/README.html)**  \n",
-    "Deploy models that can switch between reasoning and non-reasoning modes for flexible usage, such as Qwen-3."
+    "Deploy models that can switch between reasoning and non-reasoning modes for flexible usage, such as Qwen-3.\n",
+    "\n",
+    "---\n",
+    "\n",
+    "**[Deploy gpt-oss](https://docs.ray.io/en/latest/ray-overview/examples/deployment-serve-llm/gpt-oss/README.html)**  \n",
+    "Deploy gpt-oss reasoning models for high-reasoning, production-scale workloads, for lower latency (`gpt-oss-20b`) and high-reasoning (`gpt-oss-120b`) use cases."
    ]
   }
  ],
 
@@ -39,3 +39,8 @@ Deploy models with reasoning capabilities designed for long-context tasks, codin
 
 **[Deploy a hybrid reasoning LLM](https://docs.ray.io/en/latest/serve/tutorials/deployment-serve-llm/hybrid-reasoning-llm/README.html)**  
 Deploy models that can switch between reasoning and non-reasoning modes for flexible usage, such as Qwen-3.
+
+---
+
+**[Deploy gpt-oss](https://docs.ray.io/en/latest/ray-overview/examples/deployment-serve-llm/gpt-oss/README.html)**  
+Deploy gpt-oss reasoning models for high-reasoning, production-scale workloads, for lower latency (`gpt-oss-20b`) and high-reasoning (`gpt-oss-120b`) use cases.
@@ -42,7 +42,17 @@ def convert_notebook(
             else:
                 # Detect any IPython '!' shell commands in code lines
                 has_bang = any(line.lstrip().startswith("!") for line in lines)
-                if has_bang:
+                # Start with "serve run" "serve shutdown" "curl" or "anyscale service" commands
+                to_ignore_cmd = (
+                    "serve run",
+                    "serve shutdown",
+                    "curl",
+                    "anyscale service",
+                )
+                has_ignored_start = any(
+                    line.lstrip().startswith(to_ignore_cmd) for line in lines
+                )
+                if has_bang or has_ignored_start:
                     if ignore_cmds:
                         continue
                     out.write("import subprocess\n")
 
@@ -12,7 +12,8 @@ for nb in \
   "large-size-llm/notebook" \
   "vision-llm/notebook" \
   "reasoning-llm/notebook" \
-  "hybrid-reasoning-llm/notebook"
+  "hybrid-reasoning-llm/notebook" \
+  "gpt-oss/notebook" 
 do
   python ci/nb2py.py "${nb}.ipynb" "${nb}.py" --ignore-cmds
   python "${nb}.py"
 
@@ -0,0 +1,8 @@
+FROM anyscale/ray:2.49.0-slim-py312-cu128
+
+# C compiler for Triton’s runtime build step (vLLM V1 engine)
+# https://github.com/vllm-project/vllm/issues/2997
+RUN sudo apt-get update && \
+    sudo apt-get install -y --no-install-recommends build-essential
+
+RUN pip install vllm==0.10.1
Original file line number	Diff line number	Diff line change
`@@ -39,7 +39,12 @@`
`39`	`39`	`"---\n",`
`40`	`40`	`"\n",`
`41`	`41`	`"[Deploy a hybrid reasoning LLM](https://docs.ray.io/en/latest/serve/tutorials/deployment-serve-llm/hybrid-reasoning-llm/README.html) \n",`
`42`		`- "Deploy models that can switch between reasoning and non-reasoning modes for flexible usage, such as Qwen-3."`
	`42`	`+ "Deploy models that can switch between reasoning and non-reasoning modes for flexible usage, such as Qwen-3.\n",`
	`43`	`+ "\n",`
	`44`	`+ "---\n",`
	`45`	`+ "\n",`
	`46`	`+ "[Deploy gpt-oss](https://docs.ray.io/en/latest/ray-overview/examples/deployment-serve-llm/gpt-oss/README.html) \n",`
	`47`	+ "Deploy gpt-oss reasoning models for high-reasoning, production-scale workloads, for lower latency (`gpt-oss-20b`) and high-reasoning (`gpt-oss-120b`) use cases."
`43`	`48`	`]`
`44`	`49`	`}`
`45`	`50`	`],`