From 0352e834de34fe4248b2a5ec3bc0901c0a566d1a Mon Sep 17 00:00:00 2001 From: yuesu Date: Tue, 14 Apr 2026 11:25:20 +0800 Subject: [PATCH 1/2] add ov --- .pipelines/Modelkit E2E Test.yml | 222 +++------------------ .pipelines/templates/e2e-eval-jobs.yml | 258 +++++++++++++++++++++++++ 2 files changed, 290 insertions(+), 190 deletions(-) create mode 100644 .pipelines/templates/e2e-eval-jobs.yml diff --git a/.pipelines/Modelkit E2E Test.yml b/.pipelines/Modelkit E2E Test.yml index a42a84e0f..1cd0b5103 100644 --- a/.pipelines/Modelkit E2E Test.yml +++ b/.pipelines/Modelkit E2E Test.yml @@ -1,200 +1,42 @@ trigger: none +resources: + repositories: + - repository: ModelKitArtifacts + type: github + endpoint: github.com_yuesu_microsoft + name: gim-home/ModelKitArtifacts + ref: main + parameters: - name: evalDate - displayName: 'Eval date (leave empty for today, e.g. 2026-04-01)' + displayName: 'Eval date (auto = today, e.g. 2026-04-01)' type: string - default: '' + default: 'auto' - name: continueRun displayName: 'Skip already-evaluated models (--continue)' type: boolean default: true -variables: - evalOutputBase: 'c:/eval_results' - -jobs: - - job: Prepare - displayName: 'Prepare Eval Matrix' - pool: - name: modelkit-selfhost-pool - demands: - - Agent.Name -equals NPU-QNN - - steps: - - checkout: self - clean: false - fetchDepth: 1 - - - powershell: | - $uvBin = "$env:USERPROFILE\.local\bin" - if (-not (Get-Command uv -ErrorAction SilentlyContinue)) { - Invoke-RestMethod https://astral.sh/uv/0.10.12/install.ps1 | Invoke-Expression - $env:PATH = "$uvBin;$env:PATH" - } - uv python install 3.10 - Remove-Item -Recurse -Force "$(Build.SourcesDirectory)\.venv" -ErrorAction SilentlyContinue - uv venv $(Build.SourcesDirectory)\.venv --python 3.10 - $venvDir = "$(Build.SourcesDirectory)\.venv\Scripts" - Write-Host "##vso[task.prependpath]$uvBin" - Write-Host "##vso[task.prependpath]$venvDir" - displayName: 'Install uv 0.10.12 and Python' - - - script: python --version - displayName: 'Check Python version' - - - task: PipAuthenticate@1 - inputs: - artifactFeeds: 'windows.ai.toolkit/Modelkit' - displayName: 'Authenticate pip with Azure Artifacts' - - - script: uv pip install -e .[dev] - workingDirectory: $(Build.SourcesDirectory) - displayName: 'Install dependencies' - - - powershell: | - $evalDate = '${{ parameters.evalDate }}' - if (-not $evalDate) { $evalDate = Get-Date -Format 'yyyy-MM-dd' } - $dir = "$(evalOutputBase)/$evalDate" - Write-Host "##vso[task.setvariable variable=EVAL_DIR;isOutput=true]$dir" - Write-Host "Eval output directory: $dir" - name: set_output_dir - displayName: 'Set eval output directory' - - - powershell: | - $args = @( - "run", "python", "scripts/e2e_eval/run_eval.py", - "--list-json", "temp/model_list.json", - "--device", "npu" - ) - if ('${{ parameters.continueRun }}' -eq 'True') { - $args += @("--continue", "--output-dir", "$(set_output_dir.EVAL_DIR)") - } - & uv @args - workingDirectory: $(Build.SourcesDirectory) - displayName: 'Generate model list' - - - powershell: | - $models = Get-Content "$(Build.SourcesDirectory)/temp/model_list.json" | ConvertFrom-Json - $total = $models.Count - if ($total -eq 0) { - Write-Host "All models already evaluated — nothing to run" - Write-Host "##vso[task.setvariable variable=modelMatrix;isOutput=true]{}" - Write-Host "##vso[task.setvariable variable=skipEval;isOutput=true]true" - return - } - - $matrix = @{} - for ($i = 0; $i -lt $total; $i++) { - $m = $models[$i] - $slug = (($m.hf_id + '_' + $m.task) -replace '[^A-Za-z0-9]', '_') - $key = $slug - $suffix = 2 - while ($matrix.ContainsKey($key)) { - $key = "${slug}_${suffix}" - $suffix++ - } - $matrix[$key] = @{ - hf_id = [string]$m.hf_id - hf_task = [string]$m.task - priority = [string]$m.priority - model_type = [string]$m.model_type - model_group = [string]$m.group - } - } - - $json = $matrix | ConvertTo-Json -Compress -Depth 5 - Write-Host "Prepared matrix for $total models" - Write-Host "##vso[task.setvariable variable=modelMatrix;isOutput=true]$json" - name: set_matrix - displayName: 'Create matrix variables' - - - job: EvalModel - displayName: 'Eval' - dependsOn: Prepare - condition: and(succeeded(), ne(dependencies.Prepare.outputs['set_matrix.skipEval'], 'true')) - timeoutInMinutes: 90 - cancelTimeoutInMinutes: 2 - pool: - name: modelkit-selfhost-pool - demands: - - Agent.Name -equals NPU-QNN - variables: - EVAL_DIR: $[ dependencies.Prepare.outputs['set_output_dir.EVAL_DIR'] ] - strategy: - maxParallel: 1 - matrix: $[ dependencies.Prepare.outputs['set_matrix.modelMatrix'] ] - - steps: - - checkout: none - - - powershell: | - $uvBin = "$env:USERPROFILE\.local\bin" - $venvDir = "$(Build.SourcesDirectory)\.venv\Scripts" - Write-Host "##vso[task.prependpath]$uvBin" - Write-Host "##vso[task.prependpath]$venvDir" - displayName: 'Activate Python environment' - - - powershell: | - Write-Host "Model: $(hf_id) / $(hf_task)" - Write-Host "Priority: $(priority)" - Write-Host "Output: $(EVAL_DIR)" - - $uvArgs = @( - "run", "--no-sync", "python", "scripts/e2e_eval/run_eval.py", - "--hf-model", "$(hf_id)", - "--output-dir", "$(EVAL_DIR)", - "--device", "npu", - "--continue", - "--verbose", - "--timeout", "1800", - "--no-report", - "--clean-cache" - ) - if ("$(hf_task)") { - $uvArgs += @("--task", "$(hf_task)") - } - - & uv @uvArgs - $evalExit = $LASTEXITCODE - if ($evalExit -ne 0) { - Write-Warning "Model eval exited with code $evalExit for $(hf_id) / $(hf_task) (model failure — non-blocking)" - } - exit 0 - workingDirectory: $(Build.SourcesDirectory) - displayName: 'Run eval for current model' - - - job: Report - displayName: 'Generate Eval Report' - dependsOn: - - Prepare - - EvalModel - condition: always() - pool: - name: modelkit-selfhost-pool - demands: - - Agent.Name -equals NPU-QNN - variables: - EVAL_DIR: $[ dependencies.Prepare.outputs['set_output_dir.EVAL_DIR'] ] - - steps: - - checkout: none - - - powershell: | - $uvBin = "$env:USERPROFILE\.local\bin" - $venvDir = "$(Build.SourcesDirectory)\.venv\Scripts" - Write-Host "##vso[task.prependpath]$uvBin" - Write-Host "##vso[task.prependpath]$venvDir" - displayName: 'Activate Python environment' - - - script: > - uv run --no-sync python scripts/e2e_eval/generate_report.py - --input-dir $(EVAL_DIR) - workingDirectory: $(Build.SourcesDirectory) - displayName: 'Generate evaluation report' - - - task: PublishPipelineArtifact@1 - inputs: - targetPath: $(EVAL_DIR) - artifactName: EvalReport - displayName: 'Publish eval results as artifact' +stages: + - stage: NPU_QNN + displayName: 'E2E Eval — NPU-QNN' + jobs: + - template: templates/e2e-eval-jobs.yml + parameters: + agentName: NPU-QNN + agentSuffix: qnn + evalDate: ${{ parameters.evalDate }} + continueRun: ${{ parameters.continueRun }} + + - stage: NPU_OV + displayName: 'E2E Eval — NPU-OV' + dependsOn: [] + jobs: + - template: templates/e2e-eval-jobs.yml + parameters: + agentName: NPU-OV + agentSuffix: ov + evalDate: ${{ parameters.evalDate }} + continueRun: ${{ parameters.continueRun }} + modelTimeout: 3600 diff --git a/.pipelines/templates/e2e-eval-jobs.yml b/.pipelines/templates/e2e-eval-jobs.yml new file mode 100644 index 000000000..9424d118a --- /dev/null +++ b/.pipelines/templates/e2e-eval-jobs.yml @@ -0,0 +1,258 @@ +parameters: + - name: agentName + type: string + - name: agentSuffix + type: string + - name: evalOutputBase + type: string + default: 'c:/eval_results' + - name: evalDate + type: string + default: '' + - name: continueRun + type: boolean + default: true + - name: modelTimeout + type: number + default: 1800 + +jobs: + - job: Prepare_${{ parameters.agentSuffix }} + displayName: 'Prepare Eval Matrix (${{ parameters.agentSuffix }})' + pool: + name: modelkit-selfhost-pool + demands: + - Agent.Name -equals ${{ parameters.agentName }} + + steps: + - checkout: self + clean: false + fetchDepth: 1 + path: s + + - checkout: ModelKitArtifacts + fetchDepth: 1 + lfs: true + path: artifacts + + - powershell: | + Write-Host "Agent.BuildDirectory : $(Agent.BuildDirectory)" + Write-Host "Build.SourcesDirectory: $(Build.SourcesDirectory)" + $repoDir = "$(Agent.BuildDirectory)/artifacts" + if (-not (Test-Path "$repoDir/op_check_results/rules")) { + $repoDir = "$(Agent.BuildDirectory)/ModelKitArtifacts" + } + $src = "$repoDir/op_check_results/rules" + $dst = "$(Build.SourcesDirectory)/src/winml/modelkit/analyze/rules/runtime_check_rules" + if (Test-Path $src) { + New-Item -ItemType Directory -Path $dst -Force | Out-Null + $zips = Copy-Item "$src/*.zip" $dst -Force -PassThru + Write-Host "Copied $($zips.Count) rule zips to $dst" + # Verify files are real data, not LFS pointers (~130 bytes) + $bad = $zips | Where-Object { $_.Length -lt 1024 } + if ($bad) { + Write-Error "The following zip files are suspiciously small (likely unresolved LFS pointers):" + $bad | ForEach-Object { Write-Host " $($_.Name): $($_.Length) bytes" } + exit 1 + } + $zips | ForEach-Object { Write-Host " $($_.Name): $([math]::Round($_.Length / 1KB, 1)) KB" } + } else { + Write-Error "Rules source not found at: $src" + Write-Host "Contents of $repoDir :" + Get-ChildItem $repoDir -Recurse -Depth 2 | Select-Object FullName + exit 1 + } + displayName: 'Copy runtime check rules from ModelKitArtifacts' + + - powershell: | + $uvBin = "$env:USERPROFILE\.local\bin" + if (-not (Get-Command uv -ErrorAction SilentlyContinue)) { + Invoke-RestMethod https://astral.sh/uv/0.10.12/install.ps1 | Invoke-Expression + $env:PATH = "$uvBin;$env:PATH" + } + uv python install 3.10 + Remove-Item -Recurse -Force "$(Build.SourcesDirectory)\.venv" -ErrorAction SilentlyContinue + uv venv $(Build.SourcesDirectory)\.venv --python 3.10 + $venvDir = "$(Build.SourcesDirectory)\.venv\Scripts" + Write-Host "##vso[task.prependpath]$uvBin" + Write-Host "##vso[task.prependpath]$venvDir" + displayName: 'Install uv 0.10.12 and Python' + + - script: python --version + displayName: 'Check Python version' + + - task: PipAuthenticate@1 + inputs: + artifactFeeds: 'windows.ai.toolkit/Modelkit' + displayName: 'Authenticate pip with Azure Artifacts' + + - script: uv pip install -e .[dev] + workingDirectory: $(Build.SourcesDirectory) + displayName: 'Install dependencies' + + - powershell: | + $evalDate = '${{ parameters.evalDate }}' + if (-not $evalDate -or $evalDate -eq 'auto') { $evalDate = Get-Date -Format 'yyyy-MM-dd' } + $dir = "${{ parameters.evalOutputBase }}/$evalDate/${{ parameters.agentSuffix }}" + Write-Host "##vso[task.setvariable variable=EVAL_DIR;isOutput=true]$dir" + Write-Host "Eval output directory: $dir" + name: set_output_dir + displayName: 'Set eval output directory' + + - powershell: | + $args = @( + "run", "python", "scripts/e2e_eval/run_eval.py", + "--list-json", "temp/model_list.json", + "--device", "npu" + ) + if ('${{ parameters.continueRun }}' -eq 'True') { + $args += @("--continue", "--output-dir", "$(set_output_dir.EVAL_DIR)") + } + & uv @args + workingDirectory: $(Build.SourcesDirectory) + displayName: 'Generate model list' + + - powershell: | + $models = Get-Content "$(Build.SourcesDirectory)/temp/model_list.json" | ConvertFrom-Json + $total = $models.Count + if ($total -eq 0) { + Write-Host "All models already evaluated — nothing to run" + Write-Host "##vso[task.setvariable variable=modelMatrix;isOutput=true]{}" + Write-Host "##vso[task.setvariable variable=skipEval;isOutput=true]true" + return + } + + $matrix = @{} + for ($i = 0; $i -lt $total; $i++) { + $m = $models[$i] + $slug = (($m.hf_id + '_' + $m.task) -replace '[^A-Za-z0-9]', '_') + $key = $slug + $suffix = 2 + while ($matrix.ContainsKey($key)) { + $key = "${slug}_${suffix}" + $suffix++ + } + $matrix[$key] = @{ + hf_id = [string]$m.hf_id + hf_task = [string]$m.task + priority = [string]$m.priority + model_type = [string]$m.model_type + model_group = [string]$m.group + } + } + + $json = $matrix | ConvertTo-Json -Compress -Depth 5 + Write-Host "Prepared matrix for $total models" + Write-Host "##vso[task.setvariable variable=modelMatrix;isOutput=true]$json" + name: set_matrix + displayName: 'Create matrix variables' + + - job: EvalModel_${{ parameters.agentSuffix }} + displayName: 'Eval (${{ parameters.agentSuffix }})' + dependsOn: Prepare_${{ parameters.agentSuffix }} + condition: and(succeeded(), ne(dependencies.Prepare_${{ parameters.agentSuffix }}.outputs['set_matrix.skipEval'], 'true')) + timeoutInMinutes: 90 + cancelTimeoutInMinutes: 2 + pool: + name: modelkit-selfhost-pool + demands: + - Agent.Name -equals ${{ parameters.agentName }} + variables: + EVAL_DIR: $[ dependencies.Prepare_${{ parameters.agentSuffix }}.outputs['set_output_dir.EVAL_DIR'] ] + strategy: + maxParallel: 1 + matrix: $[ dependencies.Prepare_${{ parameters.agentSuffix }}.outputs['set_matrix.modelMatrix'] ] + + steps: + - checkout: self + clean: false + fetchDepth: 1 + path: s + + - checkout: ModelKitArtifacts + clean: false + fetchDepth: 1 + lfs: true + path: artifacts + + - powershell: | + $uvBin = "$env:USERPROFILE\.local\bin" + $venvDir = "$(Build.SourcesDirectory)\.venv\Scripts" + Write-Host "##vso[task.prependpath]$uvBin" + Write-Host "##vso[task.prependpath]$venvDir" + displayName: 'Activate Python environment' + + - powershell: | + Write-Host "Agent.BuildDirectory : $(Agent.BuildDirectory)" + Write-Host "Build.SourcesDirectory: $(Build.SourcesDirectory)" + Write-Host "Model: $(hf_id) / $(hf_task)" + Write-Host "Priority: $(priority)" + Write-Host "Output: $(EVAL_DIR)" + + $uvArgs = @( + "run", "--no-sync", "python", "scripts/e2e_eval/run_eval.py", + "--hf-model", "$(hf_id)", + "--output-dir", "$(EVAL_DIR)", + "--device", "npu", + "--continue", + "--verbose", + "--timeout", "${{ parameters.modelTimeout }}", + "--no-report", + "--clean-cache" + ) + if ("$(hf_task)") { + $uvArgs += @("--task", "$(hf_task)") + } + + & uv @uvArgs + $evalExit = $LASTEXITCODE + if ($evalExit -ne 0) { + Write-Warning "Model eval exited with code $evalExit for $(hf_id) / $(hf_task) (model failure — non-blocking)" + } + exit 0 + workingDirectory: $(Build.SourcesDirectory) + displayName: 'Run eval for current model' + + - job: Report_${{ parameters.agentSuffix }} + displayName: 'Generate Eval Report (${{ parameters.agentSuffix }})' + dependsOn: + - Prepare_${{ parameters.agentSuffix }} + - EvalModel_${{ parameters.agentSuffix }} + condition: always() + pool: + name: modelkit-selfhost-pool + demands: + - Agent.Name -equals ${{ parameters.agentName }} + variables: + EVAL_DIR: $[ dependencies.Prepare_${{ parameters.agentSuffix }}.outputs['set_output_dir.EVAL_DIR'] ] + + steps: + - checkout: self + clean: false + fetchDepth: 1 + path: s + + - checkout: ModelKitArtifacts + clean: false + fetchDepth: 1 + lfs: true + path: artifacts + + - powershell: | + $uvBin = "$env:USERPROFILE\.local\bin" + $venvDir = "$(Build.SourcesDirectory)\.venv\Scripts" + Write-Host "##vso[task.prependpath]$uvBin" + Write-Host "##vso[task.prependpath]$venvDir" + displayName: 'Activate Python environment' + + - script: > + uv run --no-sync python scripts/e2e_eval/generate_report.py + --input-dir $(EVAL_DIR) + workingDirectory: $(Build.SourcesDirectory) + displayName: 'Generate evaluation report' + + - task: PublishPipelineArtifact@1 + inputs: + targetPath: $(EVAL_DIR) + artifactName: EvalReport_${{ parameters.agentSuffix }} + displayName: 'Publish eval results as artifact' From 963aa61b9667c0e8e071f21d39e9b54d4997dfe3 Mon Sep 17 00:00:00 2001 From: Yue Sun <2015.apro@gmail.com> Date: Tue, 14 Apr 2026 11:42:44 +0800 Subject: [PATCH 2/2] Update Modelkit E2E Test.yml for Azure Pipelines --- .pipelines/Modelkit E2E Test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/Modelkit E2E Test.yml b/.pipelines/Modelkit E2E Test.yml index 1cd0b5103..16333235a 100644 --- a/.pipelines/Modelkit E2E Test.yml +++ b/.pipelines/Modelkit E2E Test.yml @@ -39,4 +39,4 @@ stages: agentSuffix: ov evalDate: ${{ parameters.evalDate }} continueRun: ${{ parameters.continueRun }} - modelTimeout: 3600 + modelTimeout: 3600