NVIDIA-NeMo · acanaveras · Feb 13, 2026 · Feb 13, 2026 · Feb 19, 2026 · Feb 19, 2026
diff --git a/.gitignore b/.gitignore
@@ -49,3 +49,8 @@ AGENTS.md
 .codex
 
 .idea
+site/
+site/
+site/
+site/
+site/
-site/
-site/
-site/
-site/
-site/
+site/
-site/
-site/
-site/
-site/
-site/
+site/
diff --git a/docs/tutorials/posts/noc-reasoning-agent.md b/docs/tutorials/posts/noc-reasoning-agent.md
diff --git a/recipes/noc-reasoning-agent/.gitignore b/recipes/noc-reasoning-agent/.gitignore
@@ -0,0 +1,214 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+
+# Pipeline outputs and artifacts (do not push to repo)
+outputs/
+training/
+data/categorized_incidents.csv
+data/filtered_file.csv
+data/finalized_dataset.csv
diff --git a/recipes/noc-reasoning-agent/configs/config.ini b/recipes/noc-reasoning-agent/configs/config.ini
@@ -0,0 +1,10 @@
+[download]
+qwen2.5-14=/workspace/models/Qwen2.5-14B-Instruct
+qwen2.5-32=/workspace/models/Qwen2.5-32B-Instruct
+gpt-oss-120b=/workspace/models/gpt-oss-120b
+nemotron-49b-1.5=/workspace/models/Llama-3_3-Nemotron-Super-49B-v1_5
+
+
+[data_path]
+original_data_path=data/anonymized-Incidents_Last_6_Months.csv
+incident_json_data=outputs/input_incident.jsonl
diff --git a/recipes/noc-reasoning-agent/configs/noc_reasoning_sft.yaml b/recipes/noc-reasoning-agent/configs/noc_reasoning_sft.yaml
@@ -0,0 +1,150 @@
+processors_to_run: all
+
+output_path: ???
+# prompt_config: null
+preprocessed_dataset_files: null
+input_files: null
+
+
+# --- Data Keys ---
+# These keys MUST match the output of your preprocessing script.
+input_key: "background"
+output_key: "response"
+
+# --- SFT Formatting ---
+# Define the tokenizer and the final chat format for the model.
+tokenizer: "Qwen/Qwen3-32B" # EDIT THIS or override via CLI
+
+# This uses a separate YAML file to define the chat template.
+# This makes the configuration cleaner and more reusable.
+prompt_config: "/workspace/data/prompt_incident.yaml" # EDIT THIS or override via CLI
+
+# -----------------
+# --- General Settings ---
+# -----------------
+do_shuffle: false
+deduplicate: true
+exclude_optional_keys: true
+random_seed: 42
+num_output_samples: null
+add_correct: true
+add_incorrect: false
+add_unlabeled: true # Set to true as requested
+
+# -----------------
+# --- Quality Control Filters ---
+# -----------------
+# Most filters are disabled because the data is text-based reasoning, not math or code.
+# This prevents the pipeline from incorrectly discarding valid data.
+
+
+contamination_file: null
+
+filters:
+  drop_multi_boxed: false
+  remove_contaminated: false         # can be enabled if you have a contamination file
+  majority_filter: false
+  trim_solutions: false              # Your data doesn't use the \boxed{} syntax
+  trim_prefix: false
+  drop_incorrect_arithmetic: false
+  split_arithmetic: false
+  remove_len_outlier_problems: false # Keep all data regardless of length
+  remove_len_outlier_solutions: false
+  code_text_filter: null
+  remove_code_errors: false
+  remove_verification_code: false
+  remove_matplotlib: false
+  remove_no_code: false
+  remove_no_think_tags: false         # Enabled, as requested
+
+# ================================================================================= #
+#          Processor Pipeline (Usually does not need to be changed)                 #
+# ================================================================================= #
+processors:
+  - _target_: nemo_skills.training.data_preparation_utils.preprocessing.ReadData
+    input_files: ${input_files}    # This line ensures the processor gets the input file path
+    input_key: ${input_key}
+    output_key: ${output_key}
+    add_unlabeled: ${add_unlabeled}
+    deduplicate: ${deduplicate}
+    keys_to_keep:
+    - "expected_answer"
+    - "incident_identifier"
+    - "incident_classification"
+    - "urgency_level"
+    - "geographical_territory"
+    - "incident_subtype"
+    - "service_domain"
+    - "equipment_provider"
+    - "operational_zone"
+    - "affected_site"
+    - "incident_summary"
+    - "detection_timestamp"
+    - "escalation_date"
+    - "responsible_team"
+    - "fault_category"
+    - "action_chronicle"
+    - "resolution_summary"
+    - "resolution_method"
+    - "problem_code_reasoning_process"
+
+  - _target_: nemo_skills.training.data_preparation_utils.merge_processor.MergeProcessor
+    _recursive_: false
+    processor_configs:
+    - _target_: nemo_skills.training.data_preparation_utils.filters.RemoveContaminated
+      should_run: ${filters.remove_contaminated}
+      contamination_file: ${contamination_file}
+
+    - _target_: nemo_skills.training.data_preparation_utils.filters.DropIfRegexMatch #removing errors
+      should_run: ${filters.remove_code_errors}
+      text_key: ${output_key}
+      regex_patterns:
+        - 'Traceback (most recent call last)'
+        - '<output cut>'
+        - 'Timed out'
+        - 'SyntaxError'
+      test_cases:
+        - { input: { generation: "My solution:\n---Traceback (most recent call last)---\nSomething else" }, output: null }
+        - { input: { generation: "My solution:\nSome long output<output cut>\nSomething else" }, output: null }
+        - { input: { generation: "My solution:\nTimed out\nSomething else" }, output: null }
+        - { input: { generation: "My solution:\n[0;31mSyntaxError\u001b\nSomething else" }, output: null }
+        - { input: { generation: "My solution, no errors" }, output: { generation: "My solution, no errors" } }
+
+    - _target_: nemo_skills.training.data_preparation_utils.filters.DropIfRegexNotMatch # filtering out tool calling
+      should_run: ${filters.remove_no_code}
+      text_key: ${output_key}
+      regex_patterns:
+        - '<tool_call>'
+        - '</tool_call>'
+      test_cases:
+        - { input: { generation: "My solution:\n---<tool_call>---\nSomething else" }, output: null }
+        - { input: { generation: "My solution:\ncode</tool_call>\nSomething else" }, output: null }
+        - { input: { generation: "<tool_call>code</tool_call>" }, output: { generation: "<tool_call>code</tool_call>" } }
+
+    - _target_: nemo_skills.training.data_preparation_utils.filters.DropIfRegexNotMatch # filtering out if no think tags
+      should_run: ${filters.remove_no_think_tags}
+      text_key: ${output_key}
+      regex_patterns:
+        - '</think>'
+      test_cases:
+        - { input: { generation: "My solution:\n---</think>---\nSomething else" }, output: { generation: "My solution:\n---</think>---\nSomething else" } }
+        - { input: { generation: "<think>My solution:\n\nSomething else" }, output: null }
+        - { input: { generation: "<think>thinking</think>summary" }, output: { generation: "<think>thinking</think>summary" } }
+
+
+
+  - _target_: nemo_skills.training.data_preparation_utils.preprocessing.GroupSamples
+    group_key: ${input_key}
+
+  - _target_: nemo_skills.training.data_preparation_utils.preprocessing.ShuffleAndDownsampleData
+    num_samples: ${num_output_samples}
+    random_seed: ${random_seed}
+    do_shuffle: ${do_shuffle}
+
+  - _target_: nemo_skills.training.data_preparation_utils.preprocessing.WriteFinalSftManifest
+    output_manifest_file: ${output_path}
+    prompt_config: ${prompt_config}
+    tokenizer: ${tokenizer}
+    input_key: ${input_key}
+    output_key: ${output_key}
+    exclude_optional_keys: ${exclude_optional_keys}