Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -262,5 +262,5 @@ specs/
.claude/settings.local.json
/sa_eval_results/

# Runtime check rule zips (hosted in external repo)
src/winml/modelkit/analyze/rules/runtime_check_rules/*.zip
# Runtime check rule artifacts (hosted in external repo)
src/winml/modelkit/analyze/rules/runtime_check_rules/**/*.parquet
18 changes: 10 additions & 8 deletions .pipelines/modelkit-official-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,17 @@ extends:
displayName: 'Check Python version'

- powershell: |
$rulesDir = "$(Build.SourcesDirectory)\ModelKitArtifacts\rules_zip"
$rulesDir = "$(Build.SourcesDirectory)\ModelKitArtifacts\rules"
$destDir = "$(Build.SourcesDirectory)\src\winml\modelkit\analyze\rules\runtime_check_rules"
$outDir = "$(ob_outputDirectory)"
New-Item -ItemType Directory -Path $outDir -Force | Out-Null
$zips = Get-ChildItem "$rulesDir\*.zip"
Write-Host "Found $($zips.Count) rule zips"
$zips | Copy-Item -Destination $destDir
$zips | Copy-Item -Destination $outDir
Write-Host "Copied $($zips.Count) rule zips to $destDir and $outDir"
$parquets = Get-ChildItem "$rulesDir\*.parquet" -Recurse
Write-Host "Found $($parquets.Count) rule parquet file(s)"
foreach ($parquet in $parquets) {
$relativePath = $parquet.FullName.Substring($rulesDir.Length).TrimStart('\\')
$targetPath = Join-Path $destDir $relativePath
New-Item -ItemType Directory -Path (Split-Path $targetPath -Parent) -Force | Out-Null
Copy-Item $parquet.FullName -Destination $targetPath -Force
}
Write-Host "Copied $($parquets.Count) rule parquet file(s) to $destDir"
displayName: 'Copy runtime check rules'

- task: PipAuthenticate@1
Expand Down
8 changes: 4 additions & 4 deletions .pipelines/modelkit-release-github.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,10 @@ extends:
$outDir = "$(ob_outputDirectory)"
New-Item -ItemType Directory -Path $outDir -Force | Out-Null
$wheels = Get-ChildItem "$artifactDir" -Filter "*.whl" -Recurse
$zips = Get-ChildItem "$artifactDir" -Filter "*.zip" -Recurse
Write-Host "Found $($wheels.Count) wheel(s) and $($zips.Count) zip(s)"
$parquets = Get-ChildItem "$artifactDir" -Filter "*.parquet" -Recurse
Write-Host "Found $($wheels.Count) wheel(s) and $($parquets.Count) parquet file(s)"
$wheels | Copy-Item -Destination $outDir
$zips | Copy-Item -Destination $outDir
$parquets | Copy-Item -Destination $outDir
displayName: 'Stage release assets'

- task: GitHubRelease@1
Expand All @@ -106,7 +106,7 @@ extends:
title: 'ModelKit $(ReleaseTag)'
assets: |
$(ob_outputDirectory)\*.whl
$(ob_outputDirectory)\*.zip
$(ob_outputDirectory)\*.parquet
isDraft: false
isPreRelease: true
addChangeLog: false
20 changes: 13 additions & 7 deletions .pipelines/templates/e2e-eval-jobs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,23 +52,29 @@ jobs:
Write-Host "Agent.BuildDirectory : $(Agent.BuildDirectory)"
Write-Host "Build.SourcesDirectory: $(Build.SourcesDirectory)"
$repoDir = "$(Agent.BuildDirectory)/artifacts"
if (-not (Test-Path "$repoDir/rules_zip")) {
if (-not (Test-Path "$repoDir/rules")) {
$repoDir = "$(Agent.BuildDirectory)/ModelKitArtifacts"
}
$src = "$repoDir/rules_zip"
$src = "$repoDir/rules"
$dst = "$(Build.SourcesDirectory)/src/winml/modelkit/analyze/rules/runtime_check_rules"
if (Test-Path $src) {
New-Item -ItemType Directory -Path $dst -Force | Out-Null
$zips = Copy-Item "$src/*.zip" $dst -Force -PassThru
Write-Host "Copied $($zips.Count) rule zips to $dst"
$parquets = Get-ChildItem "$src" -Filter "*.parquet" -Recurse
foreach ($parquet in $parquets) {
$relativePath = $parquet.FullName.Substring($src.Length).TrimStart('\\')
$targetPath = Join-Path $dst $relativePath
New-Item -ItemType Directory -Path (Split-Path $targetPath -Parent) -Force | Out-Null
Copy-Item $parquet.FullName -Destination $targetPath -Force
}
Write-Host "Copied $($parquets.Count) rule parquet file(s) to $dst"
# Verify files are real data, not LFS pointers (~130 bytes)
$bad = $zips | Where-Object { $_.Length -lt 1024 }
$bad = $parquets | Where-Object { $_.Length -lt 1024 }
if ($bad) {
Write-Error "The following zip files are suspiciously small (likely unresolved LFS pointers):"
Write-Error "The following parquet files are suspiciously small (likely unresolved LFS pointers):"
$bad | ForEach-Object { Write-Host " $($_.Name): $($_.Length) bytes" }
exit 1
}
$zips | ForEach-Object { Write-Host " $($_.Name): $([math]::Round($_.Length / 1KB, 1)) KB" }
$parquets | ForEach-Object { Write-Host " $($_.Name): $([math]::Round($_.Length / 1KB, 1)) KB" }
} else {
Write-Error "Rules source not found at: $src"
Write-Host "Contents of $repoDir :"
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ include = [ "winml", "winml.*" ]
"winml.modelkit.data" = [ "hub_models.json" ]
"winml.modelkit.analyze" = [
"rules/**/*.json",
"rules/**/*.zip",
"rules/**/*.parquet",
"runtime_checker/need_rerun_errors.json",
]
"winml.modelkit.export" = [
Expand Down
42 changes: 25 additions & 17 deletions scripts/download_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
"""Download runtime check rule zips from gim-home/ModelKitArtifacts.
"""Download runtime check rule parquet files from gim-home/ModelKitArtifacts.

For Microsoft internal use only. Requires gh CLI authenticated with an account
that has access to the gim-home org.

External contributors should instead download rule zips from the latest
External contributors should instead download rule parquet files from the latest
WinML-ModelKit GitHub release; see
src/winml/modelkit/analyze/rules/runtime_check_rules/README.md.
Comment thread
fangyangci marked this conversation as resolved.

Expand All @@ -26,7 +26,7 @@


SOURCE_REPO = "gim-home/ModelKitArtifacts"
SOURCE_PATH = "rules_zip"
SOURCE_PATH = "rules"
RULES_DIR = (
Path(__file__).resolve().parent.parent
/ "src"
Expand All @@ -49,9 +49,9 @@ def _get_clone_url(account: str | None = None) -> str:
" GH_ACCOUNT=<account> uv run python scripts/download_rules.py\n"
"\n"
"This script is for Microsoft internal use (gim-home org access required).\n"
"External contributors: download rule zips from the latest GitHub release:\n"
" gh release download --repo microsoft/WinML-ModelKit --pattern '*.zip' \\\n"
Comment thread
fangyangci marked this conversation as resolved.
" --dir src/winml/modelkit/analyze/rules/runtime_check_rules",
"External contributors: download rule parquet files from the latest "
"GitHub release; see\n"
" src/winml/modelkit/analyze/rules/runtime_check_rules/README.md",
file=sys.stderr,
)
sys.exit(1)
Expand Down Expand Up @@ -110,13 +110,17 @@ def _sparse_clone(clone_url: str, dest: Path) -> bool:


def main() -> None:
parser = argparse.ArgumentParser(description="Download runtime check rule zips")
parser.add_argument("--force", action="store_true", help="Re-download all zips")
parser = argparse.ArgumentParser(description="Download runtime check rule parquet files")
parser.add_argument("--force", action="store_true", help="Re-download all parquet files")
parser.add_argument("--account", type=str, help="gh CLI account with access to gim-home org")
args = parser.parse_args()

clone_url = _get_clone_url(args.account)
existing = set() if args.force else {f.name for f in RULES_DIR.glob("*.zip")}
existing = (
set()
if args.force
else {str(path.relative_to(RULES_DIR).as_posix()) for path in RULES_DIR.rglob("*.parquet")}
)

with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp) / "repo"
Expand All @@ -131,23 +135,27 @@ def main() -> None:
sys.exit(1)

src_dir = tmp_path / SOURCE_PATH
zips = list(src_dir.glob("*.zip"))
parquet_files = sorted(src_dir.rglob("*.parquet"))

if not zips:
print(f"No zip files found in {SOURCE_REPO}/{SOURCE_PATH}")
if not parquet_files:
print(f"No parquet files found in {SOURCE_REPO}/{SOURCE_PATH}")
sys.exit(1)

RULES_DIR.mkdir(parents=True, exist_ok=True)
copied = 0
for zip_file in zips:
if zip_file.name in existing:
for parquet_file in parquet_files:
rel_path = parquet_file.relative_to(src_dir)
rel_key = rel_path.as_posix()
if rel_key in existing:
continue
shutil.copy2(zip_file, RULES_DIR / zip_file.name)
destination = RULES_DIR / rel_path
destination.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(parquet_file, destination)
copied += 1

total = len(zips)
total = len(parquet_files)
skipped = total - copied
size_mb = sum((RULES_DIR / z.name).stat().st_size for z in zips) / 1024 / 1024
size_mb = sum(path.stat().st_size for path in parquet_files) / 1024 / 1024
print(f"Done. Copied: {copied}, skipped: {skipped}, total: {total} ({size_mb:.0f} MB)")


Expand Down
94 changes: 0 additions & 94 deletions scripts/materialize_rules_zip.py

This file was deleted.

Loading
Loading