From 0352e834de34fe4248b2a5ec3bc0901c0a566d1a Mon Sep 17 00:00:00 2001
From: yuesu <yuesu@microsoft.com>
Date: Tue, 14 Apr 2026 11:25:20 +0800
Subject: [PATCH 1/2] add ov

---
 .pipelines/Modelkit E2E Test.yml       | 222 +++------------------
 .pipelines/templates/e2e-eval-jobs.yml | 258 +++++++++++++++++++++++++
 2 files changed, 290 insertions(+), 190 deletions(-)
 create mode 100644 .pipelines/templates/e2e-eval-jobs.yml

diff --git a/.pipelines/Modelkit E2E Test.yml b/.pipelines/Modelkit E2E Test.yml
index a42a84e0f..1cd0b5103 100644
--- a/.pipelines/Modelkit E2E Test.yml	
+++ b/.pipelines/Modelkit E2E Test.yml	
@@ -1,200 +1,42 @@
 trigger: none
 
+resources:
+  repositories:
+    - repository: ModelKitArtifacts
+      type: github
+      endpoint: github.com_yuesu_microsoft
+      name: gim-home/ModelKitArtifacts
+      ref: main
+
 parameters:
   - name: evalDate
-    displayName: 'Eval date (leave empty for today, e.g. 2026-04-01)'
+    displayName: 'Eval date (auto = today, e.g. 2026-04-01)'
     type: string
-    default: ''
+    default: 'auto'
   - name: continueRun
     displayName: 'Skip already-evaluated models (--continue)'
     type: boolean
     default: true
 
-variables:
-  evalOutputBase: 'c:/eval_results'
-
-jobs:
-  - job: Prepare
-    displayName: 'Prepare Eval Matrix'
-    pool:
-      name: modelkit-selfhost-pool
-      demands:
-        - Agent.Name -equals NPU-QNN
-
-    steps:
-      - checkout: self
-        clean: false
-        fetchDepth: 1
-
-      - powershell: |
-          $uvBin = "$env:USERPROFILE\.local\bin"
-          if (-not (Get-Command uv -ErrorAction SilentlyContinue)) {
-            Invoke-RestMethod https://astral.sh/uv/0.10.12/install.ps1 | Invoke-Expression
-            $env:PATH = "$uvBin;$env:PATH"
-          }
-          uv python install 3.10
-          Remove-Item -Recurse -Force "$(Build.SourcesDirectory)\.venv" -ErrorAction SilentlyContinue
-          uv venv $(Build.SourcesDirectory)\.venv --python 3.10
-          $venvDir = "$(Build.SourcesDirectory)\.venv\Scripts"
-          Write-Host "##vso[task.prependpath]$uvBin"
-          Write-Host "##vso[task.prependpath]$venvDir"
-        displayName: 'Install uv 0.10.12 and Python'
-
-      - script: python --version
-        displayName: 'Check Python version'
-
-      - task: PipAuthenticate@1
-        inputs:
-          artifactFeeds: 'windows.ai.toolkit/Modelkit'
-        displayName: 'Authenticate pip with Azure Artifacts'
-
-      - script: uv pip install -e .[dev]
-        workingDirectory: $(Build.SourcesDirectory)
-        displayName: 'Install dependencies'
-
-      - powershell: |
-          $evalDate = '${{ parameters.evalDate }}'
-          if (-not $evalDate) { $evalDate = Get-Date -Format 'yyyy-MM-dd' }
-          $dir = "$(evalOutputBase)/$evalDate"
-          Write-Host "##vso[task.setvariable variable=EVAL_DIR;isOutput=true]$dir"
-          Write-Host "Eval output directory: $dir"
-        name: set_output_dir
-        displayName: 'Set eval output directory'
-
-      - powershell: |
-          $args = @(
-              "run", "python", "scripts/e2e_eval/run_eval.py",
-              "--list-json", "temp/model_list.json",
-              "--device", "npu"
-          )
-          if ('${{ parameters.continueRun }}' -eq 'True') {
-              $args += @("--continue", "--output-dir", "$(set_output_dir.EVAL_DIR)")
-          }
-          & uv @args
-        workingDirectory: $(Build.SourcesDirectory)
-        displayName: 'Generate model list'
-
-      - powershell: |
-          $models = Get-Content "$(Build.SourcesDirectory)/temp/model_list.json" | ConvertFrom-Json
-          $total = $models.Count
-          if ($total -eq 0) {
-              Write-Host "All models already evaluated — nothing to run"
-              Write-Host "##vso[task.setvariable variable=modelMatrix;isOutput=true]{}"
-              Write-Host "##vso[task.setvariable variable=skipEval;isOutput=true]true"
-              return
-          }
-
-          $matrix = @{}
-          for ($i = 0; $i -lt $total; $i++) {
-              $m = $models[$i]
-              $slug = (($m.hf_id + '_' + $m.task) -replace '[^A-Za-z0-9]', '_')
-              $key = $slug
-              $suffix = 2
-              while ($matrix.ContainsKey($key)) {
-                  $key = "${slug}_${suffix}"
-                  $suffix++
-              }
-              $matrix[$key] = @{
-                  hf_id = [string]$m.hf_id
-                  hf_task = [string]$m.task
-                  priority = [string]$m.priority
-                  model_type = [string]$m.model_type
-                  model_group = [string]$m.group
-              }
-          }
-
-          $json = $matrix | ConvertTo-Json -Compress -Depth 5
-          Write-Host "Prepared matrix for $total models"
-          Write-Host "##vso[task.setvariable variable=modelMatrix;isOutput=true]$json"
-        name: set_matrix
-        displayName: 'Create matrix variables'
-
-  - job: EvalModel
-    displayName: 'Eval'
-    dependsOn: Prepare
-    condition: and(succeeded(), ne(dependencies.Prepare.outputs['set_matrix.skipEval'], 'true'))
-    timeoutInMinutes: 90
-    cancelTimeoutInMinutes: 2
-    pool:
-      name: modelkit-selfhost-pool
-      demands:
-        - Agent.Name -equals NPU-QNN
-    variables:
-      EVAL_DIR: $[ dependencies.Prepare.outputs['set_output_dir.EVAL_DIR'] ]
-    strategy:
-      maxParallel: 1
-      matrix: $[ dependencies.Prepare.outputs['set_matrix.modelMatrix'] ]
-
-    steps:
-      - checkout: none
-
-      - powershell: |
-          $uvBin = "$env:USERPROFILE\.local\bin"
-          $venvDir = "$(Build.SourcesDirectory)\.venv\Scripts"
-          Write-Host "##vso[task.prependpath]$uvBin"
-          Write-Host "##vso[task.prependpath]$venvDir"
-        displayName: 'Activate Python environment'
-
-      - powershell: |
-          Write-Host "Model: $(hf_id) / $(hf_task)"
-          Write-Host "Priority: $(priority)"
-          Write-Host "Output: $(EVAL_DIR)"
-
-          $uvArgs = @(
-              "run", "--no-sync", "python", "scripts/e2e_eval/run_eval.py",
-              "--hf-model", "$(hf_id)",
-              "--output-dir", "$(EVAL_DIR)",
-              "--device", "npu",
-              "--continue",
-              "--verbose",
-              "--timeout", "1800",
-              "--no-report",
-              "--clean-cache"
-          )
-          if ("$(hf_task)") {
-              $uvArgs += @("--task", "$(hf_task)")
-          }
-
-          & uv @uvArgs
-          $evalExit = $LASTEXITCODE
-          if ($evalExit -ne 0) {
-              Write-Warning "Model eval exited with code $evalExit for $(hf_id) / $(hf_task) (model failure — non-blocking)"
-          }
-          exit 0
-        workingDirectory: $(Build.SourcesDirectory)
-        displayName: 'Run eval for current model'
-
-  - job: Report
-    displayName: 'Generate Eval Report'
-    dependsOn:
-      - Prepare
-      - EvalModel
-    condition: always()
-    pool:
-      name: modelkit-selfhost-pool
-      demands:
-        - Agent.Name -equals NPU-QNN
-    variables:
-      EVAL_DIR: $[ dependencies.Prepare.outputs['set_output_dir.EVAL_DIR'] ]
-
-    steps:
-      - checkout: none
-
-      - powershell: |
-          $uvBin = "$env:USERPROFILE\.local\bin"
-          $venvDir = "$(Build.SourcesDirectory)\.venv\Scripts"
-          Write-Host "##vso[task.prependpath]$uvBin"
-          Write-Host "##vso[task.prependpath]$venvDir"
-        displayName: 'Activate Python environment'
-
-      - script: >
-          uv run --no-sync python scripts/e2e_eval/generate_report.py
-          --input-dir $(EVAL_DIR)
-        workingDirectory: $(Build.SourcesDirectory)
-        displayName: 'Generate evaluation report'
-
-      - task: PublishPipelineArtifact@1
-        inputs:
-          targetPath: $(EVAL_DIR)
-          artifactName: EvalReport
-        displayName: 'Publish eval results as artifact'
+stages:
+  - stage: NPU_QNN
+    displayName: 'E2E Eval — NPU-QNN'
+    jobs:
+      - template: templates/e2e-eval-jobs.yml
+        parameters:
+          agentName: NPU-QNN
+          agentSuffix: qnn
+          evalDate: ${{ parameters.evalDate }}
+          continueRun: ${{ parameters.continueRun }}
+
+  - stage: NPU_OV
+    displayName: 'E2E Eval — NPU-OV'
+    dependsOn: []
+    jobs:
+      - template: templates/e2e-eval-jobs.yml
+        parameters:
+          agentName: NPU-OV
+          agentSuffix: ov
+          evalDate: ${{ parameters.evalDate }}
+          continueRun: ${{ parameters.continueRun }}
+          modelTimeout: 3600
diff --git a/.pipelines/templates/e2e-eval-jobs.yml b/.pipelines/templates/e2e-eval-jobs.yml
new file mode 100644
index 000000000..9424d118a
--- /dev/null
+++ b/.pipelines/templates/e2e-eval-jobs.yml
@@ -0,0 +1,258 @@
+parameters:
+  - name: agentName
+    type: string
+  - name: agentSuffix
+    type: string
+  - name: evalOutputBase
+    type: string
+    default: 'c:/eval_results'
+  - name: evalDate
+    type: string
+    default: ''
+  - name: continueRun
+    type: boolean
+    default: true
+  - name: modelTimeout
+    type: number
+    default: 1800
+
+jobs:
+  - job: Prepare_${{ parameters.agentSuffix }}
+    displayName: 'Prepare Eval Matrix (${{ parameters.agentSuffix }})'
+    pool:
+      name: modelkit-selfhost-pool
+      demands:
+        - Agent.Name -equals ${{ parameters.agentName }}
+
+    steps:
+      - checkout: self
+        clean: false
+        fetchDepth: 1
+        path: s
+
+      - checkout: ModelKitArtifacts
+        fetchDepth: 1
+        lfs: true
+        path: artifacts
+
+      - powershell: |
+          Write-Host "Agent.BuildDirectory : $(Agent.BuildDirectory)"
+          Write-Host "Build.SourcesDirectory: $(Build.SourcesDirectory)"
+          $repoDir = "$(Agent.BuildDirectory)/artifacts"
+          if (-not (Test-Path "$repoDir/op_check_results/rules")) {
+            $repoDir = "$(Agent.BuildDirectory)/ModelKitArtifacts"
+          }
+          $src = "$repoDir/op_check_results/rules"
+          $dst = "$(Build.SourcesDirectory)/src/winml/modelkit/analyze/rules/runtime_check_rules"
+          if (Test-Path $src) {
+            New-Item -ItemType Directory -Path $dst -Force | Out-Null
+            $zips = Copy-Item "$src/*.zip" $dst -Force -PassThru
+            Write-Host "Copied $($zips.Count) rule zips to $dst"
+            # Verify files are real data, not LFS pointers (~130 bytes)
+            $bad = $zips | Where-Object { $_.Length -lt 1024 }
+            if ($bad) {
+              Write-Error "The following zip files are suspiciously small (likely unresolved LFS pointers):"
+              $bad | ForEach-Object { Write-Host "  $($_.Name): $($_.Length) bytes" }
+              exit 1
+            }
+            $zips | ForEach-Object { Write-Host "  $($_.Name): $([math]::Round($_.Length / 1KB, 1)) KB" }
+          } else {
+            Write-Error "Rules source not found at: $src"
+            Write-Host "Contents of $repoDir :"
+            Get-ChildItem $repoDir -Recurse -Depth 2 | Select-Object FullName
+            exit 1
+          }
+        displayName: 'Copy runtime check rules from ModelKitArtifacts'
+
+      - powershell: |
+          $uvBin = "$env:USERPROFILE\.local\bin"
+          if (-not (Get-Command uv -ErrorAction SilentlyContinue)) {
+            Invoke-RestMethod https://astral.sh/uv/0.10.12/install.ps1 | Invoke-Expression
+            $env:PATH = "$uvBin;$env:PATH"
+          }
+          uv python install 3.10
+          Remove-Item -Recurse -Force "$(Build.SourcesDirectory)\.venv" -ErrorAction SilentlyContinue
+          uv venv $(Build.SourcesDirectory)\.venv --python 3.10
+          $venvDir = "$(Build.SourcesDirectory)\.venv\Scripts"
+          Write-Host "##vso[task.prependpath]$uvBin"
+          Write-Host "##vso[task.prependpath]$venvDir"
+        displayName: 'Install uv 0.10.12 and Python'
+
+      - script: python --version
+        displayName: 'Check Python version'
+
+      - task: PipAuthenticate@1
+        inputs:
+          artifactFeeds: 'windows.ai.toolkit/Modelkit'
+        displayName: 'Authenticate pip with Azure Artifacts'
+
+      - script: uv pip install -e .[dev]
+        workingDirectory: $(Build.SourcesDirectory)
+        displayName: 'Install dependencies'
+
+      - powershell: |
+          $evalDate = '${{ parameters.evalDate }}'
+          if (-not $evalDate -or $evalDate -eq 'auto') { $evalDate = Get-Date -Format 'yyyy-MM-dd' }
+          $dir = "${{ parameters.evalOutputBase }}/$evalDate/${{ parameters.agentSuffix }}"
+          Write-Host "##vso[task.setvariable variable=EVAL_DIR;isOutput=true]$dir"
+          Write-Host "Eval output directory: $dir"
+        name: set_output_dir
+        displayName: 'Set eval output directory'
+
+      - powershell: |
+          $args = @(
+              "run", "python", "scripts/e2e_eval/run_eval.py",
+              "--list-json", "temp/model_list.json",
+              "--device", "npu"
+          )
+          if ('${{ parameters.continueRun }}' -eq 'True') {
+              $args += @("--continue", "--output-dir", "$(set_output_dir.EVAL_DIR)")
+          }
+          & uv @args
+        workingDirectory: $(Build.SourcesDirectory)
+        displayName: 'Generate model list'
+
+      - powershell: |
+          $models = Get-Content "$(Build.SourcesDirectory)/temp/model_list.json" | ConvertFrom-Json
+          $total = $models.Count
+          if ($total -eq 0) {
+              Write-Host "All models already evaluated — nothing to run"
+              Write-Host "##vso[task.setvariable variable=modelMatrix;isOutput=true]{}"
+              Write-Host "##vso[task.setvariable variable=skipEval;isOutput=true]true"
+              return
+          }
+
+          $matrix = @{}
+          for ($i = 0; $i -lt $total; $i++) {
+              $m = $models[$i]
+              $slug = (($m.hf_id + '_' + $m.task) -replace '[^A-Za-z0-9]', '_')
+              $key = $slug
+              $suffix = 2
+              while ($matrix.ContainsKey($key)) {
+                  $key = "${slug}_${suffix}"
+                  $suffix++
+              }
+              $matrix[$key] = @{
+                  hf_id = [string]$m.hf_id
+                  hf_task = [string]$m.task
+                  priority = [string]$m.priority
+                  model_type = [string]$m.model_type
+                  model_group = [string]$m.group
+              }
+          }
+
+          $json = $matrix | ConvertTo-Json -Compress -Depth 5
+          Write-Host "Prepared matrix for $total models"
+          Write-Host "##vso[task.setvariable variable=modelMatrix;isOutput=true]$json"
+        name: set_matrix
+        displayName: 'Create matrix variables'
+
+  - job: EvalModel_${{ parameters.agentSuffix }}
+    displayName: 'Eval (${{ parameters.agentSuffix }})'
+    dependsOn: Prepare_${{ parameters.agentSuffix }}
+    condition: and(succeeded(), ne(dependencies.Prepare_${{ parameters.agentSuffix }}.outputs['set_matrix.skipEval'], 'true'))
+    timeoutInMinutes: 90
+    cancelTimeoutInMinutes: 2
+    pool:
+      name: modelkit-selfhost-pool
+      demands:
+        - Agent.Name -equals ${{ parameters.agentName }}
+    variables:
+      EVAL_DIR: $[ dependencies.Prepare_${{ parameters.agentSuffix }}.outputs['set_output_dir.EVAL_DIR'] ]
+    strategy:
+      maxParallel: 1
+      matrix: $[ dependencies.Prepare_${{ parameters.agentSuffix }}.outputs['set_matrix.modelMatrix'] ]
+
+    steps:
+      - checkout: self
+        clean: false
+        fetchDepth: 1
+        path: s
+
+      - checkout: ModelKitArtifacts
+        clean: false
+        fetchDepth: 1
+        lfs: true
+        path: artifacts
+
+      - powershell: |
+          $uvBin = "$env:USERPROFILE\.local\bin"
+          $venvDir = "$(Build.SourcesDirectory)\.venv\Scripts"
+          Write-Host "##vso[task.prependpath]$uvBin"
+          Write-Host "##vso[task.prependpath]$venvDir"
+        displayName: 'Activate Python environment'
+
+      - powershell: |
+          Write-Host "Agent.BuildDirectory : $(Agent.BuildDirectory)"
+          Write-Host "Build.SourcesDirectory: $(Build.SourcesDirectory)"
+          Write-Host "Model: $(hf_id) / $(hf_task)"
+          Write-Host "Priority: $(priority)"
+          Write-Host "Output: $(EVAL_DIR)"
+
+          $uvArgs = @(
+              "run", "--no-sync", "python", "scripts/e2e_eval/run_eval.py",
+              "--hf-model", "$(hf_id)",
+              "--output-dir", "$(EVAL_DIR)",
+              "--device", "npu",
+              "--continue",
+              "--verbose",
+              "--timeout", "${{ parameters.modelTimeout }}",
+              "--no-report",
+              "--clean-cache"
+          )
+          if ("$(hf_task)") {
+              $uvArgs += @("--task", "$(hf_task)")
+          }
+
+          & uv @uvArgs
+          $evalExit = $LASTEXITCODE
+          if ($evalExit -ne 0) {
+              Write-Warning "Model eval exited with code $evalExit for $(hf_id) / $(hf_task) (model failure — non-blocking)"
+          }
+          exit 0
+        workingDirectory: $(Build.SourcesDirectory)
+        displayName: 'Run eval for current model'
+
+  - job: Report_${{ parameters.agentSuffix }}
+    displayName: 'Generate Eval Report (${{ parameters.agentSuffix }})'
+    dependsOn:
+      - Prepare_${{ parameters.agentSuffix }}
+      - EvalModel_${{ parameters.agentSuffix }}
+    condition: always()
+    pool:
+      name: modelkit-selfhost-pool
+      demands:
+        - Agent.Name -equals ${{ parameters.agentName }}
+    variables:
+      EVAL_DIR: $[ dependencies.Prepare_${{ parameters.agentSuffix }}.outputs['set_output_dir.EVAL_DIR'] ]
+
+    steps:
+      - checkout: self
+        clean: false
+        fetchDepth: 1
+        path: s
+
+      - checkout: ModelKitArtifacts
+        clean: false
+        fetchDepth: 1
+        lfs: true
+        path: artifacts
+
+      - powershell: |
+          $uvBin = "$env:USERPROFILE\.local\bin"
+          $venvDir = "$(Build.SourcesDirectory)\.venv\Scripts"
+          Write-Host "##vso[task.prependpath]$uvBin"
+          Write-Host "##vso[task.prependpath]$venvDir"
+        displayName: 'Activate Python environment'
+
+      - script: >
+          uv run --no-sync python scripts/e2e_eval/generate_report.py
+          --input-dir $(EVAL_DIR)
+        workingDirectory: $(Build.SourcesDirectory)
+        displayName: 'Generate evaluation report'
+
+      - task: PublishPipelineArtifact@1
+        inputs:
+          targetPath: $(EVAL_DIR)
+          artifactName: EvalReport_${{ parameters.agentSuffix }}
+        displayName: 'Publish eval results as artifact'

From 963aa61b9667c0e8e071f21d39e9b54d4997dfe3 Mon Sep 17 00:00:00 2001
From: Yue Sun <2015.apro@gmail.com>
Date: Tue, 14 Apr 2026 11:42:44 +0800
Subject: [PATCH 2/2] Update Modelkit E2E Test.yml for Azure Pipelines

---
 .pipelines/Modelkit E2E Test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pipelines/Modelkit E2E Test.yml b/.pipelines/Modelkit E2E Test.yml
index 1cd0b5103..16333235a 100644
--- a/.pipelines/Modelkit E2E Test.yml	
+++ b/.pipelines/Modelkit E2E Test.yml	
@@ -39,4 +39,4 @@ stages:
           agentSuffix: ov
           evalDate: ${{ parameters.evalDate }}
           continueRun: ${{ parameters.continueRun }}
-          modelTimeout: 3600
+          modelTimeout: 3600