Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/skills/ci-pipeline-monitor/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ temp/
# Intermediate JSON output files (piped between scripts)
failing_builds.json
failed_tests.json
cached_labels.json
Comment thread
kg marked this conversation as resolved.
35 changes: 23 additions & 12 deletions .github/skills/ci-pipeline-monitor/scripts/update_github.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,21 @@ def _one_failure(self, fail, go):
gh_issue_command.append("create")
creating_new_issue = True
Comment on lines 87 to 88

if fail["labels"]:
if creating_new_issue:
for label in fail["labels"].split(','):
stripped_label = label.strip()
if stripped_label:
gh_issue_command.append('--label')
gh_issue_command.append(stripped_label)

# Title / Labels / Milestone for issue filing
if creating_new_issue:
if fail["labels"]:
# TODO: Find a way to make a case-insensitive set so we can avoid normalizing the labels to lowercase
label_set = set([l.strip().lower() for l in fail["labels"].split(',')])
else:
label_set = set()
label_set.add("blocking-clean-ci-optional")

for label in sorted(label_set):
if len(label) == 0:
continue
gh_issue_command.append('--label')
gh_issue_command.append(label)

test_name = fail["test_name"]
if creating_new_issue:
gh_issue_command.append('--title')
Expand Down Expand Up @@ -123,10 +129,15 @@ def _one_failure(self, fail, go):
))

# Filter affected list to build IDs that don't have stamp files
affected = [ap for ap in affected if not os.path.exists(self.get_stamp_path(ap))]

if len(affected) == 0:
print("All affected builds have stamps; skipping.")
unstamped_affected = [ap for ap in affected if not os.path.exists(self.get_stamp_path(ap))]

# If the database tells us to add a comment instead of creating a new issue, ensure we don't generate redundant
# comments for builds that we've already generated a comment for before. When creating new issues we can't
# perform this check, the database often wants us to generate multiple issues for a given build ID.
# TODO: Investigate why we can't use stamp filtering on new issues more deeply, it seems like it should work.
if not creating_new_issue and (len(unstamped_affected) == 0):
affected_build_list = ", ".join([str(ap["build_id"]) for ap in affected])
print(f"All affected builds ({affected_build_list}) have stamps; skipping.")
return

out.append(f"**Failed in ({len(affected)}):**")
Expand Down
61 changes: 59 additions & 2 deletions .github/skills/ci-pipeline-monitor/scripts/validate_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import urllib.parse
import urllib.request

LABELS_API_ENDPOINT = "https://api.github.com/repos/dotnet/runtime/labels"
LABELS_CACHE_FILE = os.path.join(__file__, "..", "cached_labels.json")

def check(name, passed, detail="", warn_only=False):
if not passed and warn_only:
Expand Down Expand Up @@ -574,6 +576,61 @@ def _build_sig(error_message, stack_trace):
if not ok:
failures += 1

# 16h. Every failure's suggested labels are valid labels on the dotnet/runtime repo
# Otherwise, update_github will fail to file the issue

Comment thread
kg marked this conversation as resolved.
total += 1
all_label_names = set()
try:
# TODO: If dotnet/runtime ever has more than 999 labels, increase this page limit. At time of script authoring
# we have around 300 so this is more than enough.
for page_index in range(1, 11):
req = urllib.request.Request(f"{LABELS_API_ENDPOINT}?per_page=100&page={page_index}", headers={
"Accept": "application/vnd.github+json",
"User-Agent": "ci-pipeline-monitor-validator",
})
with urllib.request.urlopen(req, timeout=15) as resp:
data = json.loads(resp.read())
page_label_names = set([item["name"] for item in data])
# Don't bother querying additional empty pages after this one.
if len(page_label_names) <= 0:
break

for name in page_label_names:
all_label_names.add(name.lower())

with open(LABELS_CACHE_FILE, "w") as f:
f.write(json.dumps(sorted(all_label_names)))
except urllib.error.HTTPError:
print(" [WARN] Failed to fetch labels list from GitHub due to an HTTP error. Loading cached labels list.")
with open(LABELS_CACHE_FILE) as f:
labels_json = f.read()
labels_list = json.loads(labels_json)
all_label_names = set(labels_list)
Comment thread
kg marked this conversation as resolved.

Comment thread
kg marked this conversation as resolved.
all_failure_rows = conn.execute("""
SELECT id, labels FROM failures
""").fetchall()
bad_labels = []
for failure_row in all_failure_rows:
raw_labels = failure_row["labels"]
failure_labels = raw_labels.split(',') if raw_labels else []
for failure_label in failure_labels:
failure_label = failure_label.strip().lower()
if not failure_label:
continue
if not (failure_label in all_label_names):
print(f" [FAIL] Invalid label '{failure_label}' for failure {failure_row['id']}")
bad_labels.append(failure_row)

ok = check("All failure labels are valid labels from dotnet/runtime repo",
len(bad_labels) == 0,
f"{len(bad_labels)} label(s) were invalid (see above)" if len(bad_labels) else "")
if not ok:
print("Full set of valid labels follows:")
print(", ".join(sorted(all_label_names)))
failures += 1

# Report checks (only if --report provided)
if args.report:
print("\n=== Report Sanity ===")
Expand Down Expand Up @@ -736,10 +793,10 @@ def _build_sig(error_message, stack_trace):
warn_str = f", {warnings} warnings" if warnings else ""
print(f"Results: {passed}/{total} passed, {failures} failed{warn_str}")
if failures:
print(" VALIDATION FAILED — fix issues before publishing report")
print("/!\\ VALIDATION FAILED — fix issues before publishing report /!\\")
sys.exit(1)
else:
print("ALL CHECKS PASSED")
print("ALL CHECKS PASSED")
sys.exit(0)


Expand Down