diff --git a/.github/skills/ci-pipeline-monitor/.gitignore b/.github/skills/ci-pipeline-monitor/.gitignore index ff595e326d8e23..336d4ad83fb8af 100644 --- a/.github/skills/ci-pipeline-monitor/.gitignore +++ b/.github/skills/ci-pipeline-monitor/.gitignore @@ -13,3 +13,4 @@ temp/ # Intermediate JSON output files (piped between scripts) failing_builds.json failed_tests.json +cached_labels.json diff --git a/.github/skills/ci-pipeline-monitor/scripts/update_github.py b/.github/skills/ci-pipeline-monitor/scripts/update_github.py index a18e96e1c6f2ea..180ff83b87e278 100644 --- a/.github/skills/ci-pipeline-monitor/scripts/update_github.py +++ b/.github/skills/ci-pipeline-monitor/scripts/update_github.py @@ -87,15 +87,21 @@ def _one_failure(self, fail, go): gh_issue_command.append("create") creating_new_issue = True - if fail["labels"]: - if creating_new_issue: - for label in fail["labels"].split(','): - stripped_label = label.strip() - if stripped_label: - gh_issue_command.append('--label') - gh_issue_command.append(stripped_label) - # Title / Labels / Milestone for issue filing + if creating_new_issue: + if fail["labels"]: + # TODO: Find a way to make a case-insensitive set so we can avoid normalizing the labels to lowercase + label_set = set([l.strip().lower() for l in fail["labels"].split(',')]) + else: + label_set = set() + label_set.add("blocking-clean-ci-optional") + + for label in sorted(label_set): + if len(label) == 0: + continue + gh_issue_command.append('--label') + gh_issue_command.append(label) + test_name = fail["test_name"] if creating_new_issue: gh_issue_command.append('--title') @@ -123,10 +129,15 @@ def _one_failure(self, fail, go): )) # Filter affected list to build IDs that don't have stamp files - affected = [ap for ap in affected if not os.path.exists(self.get_stamp_path(ap))] - - if len(affected) == 0: - print("All affected builds have stamps; skipping.") + unstamped_affected = [ap for ap in affected if not os.path.exists(self.get_stamp_path(ap))] + + # If the database tells us to add a comment instead of creating a new issue, ensure we don't generate redundant + # comments for builds that we've already generated a comment for before. When creating new issues we can't + # perform this check, the database often wants us to generate multiple issues for a given build ID. + # TODO: Investigate why we can't use stamp filtering on new issues more deeply, it seems like it should work. + if not creating_new_issue and (len(unstamped_affected) == 0): + affected_build_list = ", ".join([str(ap["build_id"]) for ap in affected]) + print(f"All affected builds ({affected_build_list}) have stamps; skipping.") return out.append(f"**Failed in ({len(affected)}):**") diff --git a/.github/skills/ci-pipeline-monitor/scripts/validate_results.py b/.github/skills/ci-pipeline-monitor/scripts/validate_results.py index b2c8d59aba75ba..3070b1cc60f1cb 100644 --- a/.github/skills/ci-pipeline-monitor/scripts/validate_results.py +++ b/.github/skills/ci-pipeline-monitor/scripts/validate_results.py @@ -18,6 +18,8 @@ import urllib.parse import urllib.request +LABELS_API_ENDPOINT = "https://api.github.com/repos/dotnet/runtime/labels" +LABELS_CACHE_FILE = os.path.join(__file__, "..", "cached_labels.json") def check(name, passed, detail="", warn_only=False): if not passed and warn_only: @@ -574,6 +576,61 @@ def _build_sig(error_message, stack_trace): if not ok: failures += 1 + # 16h. Every failure's suggested labels are valid labels on the dotnet/runtime repo + # Otherwise, update_github will fail to file the issue + + total += 1 + all_label_names = set() + try: + # TODO: If dotnet/runtime ever has more than 999 labels, increase this page limit. At time of script authoring + # we have around 300 so this is more than enough. + for page_index in range(1, 11): + req = urllib.request.Request(f"{LABELS_API_ENDPOINT}?per_page=100&page={page_index}", headers={ + "Accept": "application/vnd.github+json", + "User-Agent": "ci-pipeline-monitor-validator", + }) + with urllib.request.urlopen(req, timeout=15) as resp: + data = json.loads(resp.read()) + page_label_names = set([item["name"] for item in data]) + # Don't bother querying additional empty pages after this one. + if len(page_label_names) <= 0: + break + + for name in page_label_names: + all_label_names.add(name.lower()) + + with open(LABELS_CACHE_FILE, "w") as f: + f.write(json.dumps(sorted(all_label_names))) + except urllib.error.HTTPError: + print(" [WARN] Failed to fetch labels list from GitHub due to an HTTP error. Loading cached labels list.") + with open(LABELS_CACHE_FILE) as f: + labels_json = f.read() + labels_list = json.loads(labels_json) + all_label_names = set(labels_list) + + all_failure_rows = conn.execute(""" + SELECT id, labels FROM failures + """).fetchall() + bad_labels = [] + for failure_row in all_failure_rows: + raw_labels = failure_row["labels"] + failure_labels = raw_labels.split(',') if raw_labels else [] + for failure_label in failure_labels: + failure_label = failure_label.strip().lower() + if not failure_label: + continue + if not (failure_label in all_label_names): + print(f" [FAIL] Invalid label '{failure_label}' for failure {failure_row['id']}") + bad_labels.append(failure_row) + + ok = check("All failure labels are valid labels from dotnet/runtime repo", + len(bad_labels) == 0, + f"{len(bad_labels)} label(s) were invalid (see above)" if len(bad_labels) else "") + if not ok: + print("Full set of valid labels follows:") + print(", ".join(sorted(all_label_names))) + failures += 1 + # Report checks (only if --report provided) if args.report: print("\n=== Report Sanity ===") @@ -736,10 +793,10 @@ def _build_sig(error_message, stack_trace): warn_str = f", {warnings} warnings" if warnings else "" print(f"Results: {passed}/{total} passed, {failures} failed{warn_str}") if failures: - print("❌ VALIDATION FAILED — fix issues before publishing report") + print("/!\\ VALIDATION FAILED — fix issues before publishing report /!\\") sys.exit(1) else: - print("✅ ALL CHECKS PASSED") + print("ALL CHECKS PASSED") sys.exit(0)