|
| 1 | +# Copyright (c) 2024 Airbyte, Inc., all rights reserved. |
| 2 | +"""@private Utility functions for working with the Airbyte connector registry.""" |
| 3 | + |
| 4 | +import logging |
| 5 | +import re |
| 6 | + |
| 7 | +import requests |
| 8 | + |
| 9 | + |
| 10 | +logger = logging.getLogger("airbyte.registry") |
| 11 | + |
| 12 | + |
| 13 | +def parse_changelog_html( # noqa: PLR0914 |
| 14 | + html_content: str, connector_name: str |
| 15 | +) -> list[dict[str, str | list[str] | None]]: |
| 16 | + """Parse changelog HTML to extract version history. |
| 17 | +
|
| 18 | + Returns a list of dicts with keys: version, release_date, docker_image_url, |
| 19 | + changelog_url, pr_url, pr_title, parsing_errors. |
| 20 | + """ |
| 21 | + versions: list[dict[str, str | list[str] | None]] = [] |
| 22 | + |
| 23 | + connector_type = "sources" if connector_name.startswith("source-") else "destinations" |
| 24 | + connector_short_name = connector_name.replace("source-", "").replace("destination-", "") |
| 25 | + |
| 26 | + changelog_url = ( |
| 27 | + f"https://docs.airbyte.com/integrations/{connector_type}/{connector_short_name}#changelog" |
| 28 | + ) |
| 29 | + |
| 30 | + row_pattern = re.compile( |
| 31 | + r"<tr><td[^>]*>([^<]+)<td[^>]*>([^<]+)<td[^>]*>(.*?)<td[^>]*>(.*?)<tr>", re.DOTALL |
| 32 | + ) |
| 33 | + |
| 34 | + pr_pattern = re.compile( |
| 35 | + r"<a href=https://github\.com/airbytehq/airbyte/pull/(\d+)[^>]*>(\d+)</a>" |
| 36 | + ) |
| 37 | + |
| 38 | + for match in row_pattern.finditer(html_content): |
| 39 | + version = match.group(1).strip() |
| 40 | + date = match.group(2).strip() |
| 41 | + pr_cell = match.group(3) |
| 42 | + subject = match.group(4).strip() |
| 43 | + |
| 44 | + if not re.match(r"\d{4}-\d{2}-\d{2}", date): |
| 45 | + continue |
| 46 | + |
| 47 | + pr_matches = list(pr_pattern.finditer(pr_cell)) |
| 48 | + pr_url = None |
| 49 | + pr_title = None |
| 50 | + parsing_errors = [] |
| 51 | + |
| 52 | + if pr_matches: |
| 53 | + first_pr = pr_matches[0] |
| 54 | + pr_number = first_pr.group(1) |
| 55 | + pr_url = f"https://github.com/airbytehq/airbyte/pull/{pr_number}" |
| 56 | + |
| 57 | + pr_title = re.sub(r"<[^>]+>", "", subject) |
| 58 | + pr_title = pr_title.replace(""", '"').replace("&", "&") |
| 59 | + pr_title = pr_title.replace("<", "<").replace(">", ">") |
| 60 | + pr_title = pr_title.strip() |
| 61 | + |
| 62 | + if len(pr_matches) > 1: |
| 63 | + parsing_errors.append( |
| 64 | + f"Multiple PRs found for version {version}, using first PR: {pr_number}" |
| 65 | + ) |
| 66 | + else: |
| 67 | + parsing_errors.append(f"No PR link found in changelog for version {version}") |
| 68 | + |
| 69 | + docker_image_url = f"https://hub.docker.com/r/airbyte/{connector_name}/tags?name={version}" |
| 70 | + |
| 71 | + versions.append( |
| 72 | + { |
| 73 | + "version": version, |
| 74 | + "release_date": date or None, |
| 75 | + "docker_image_url": docker_image_url, |
| 76 | + "changelog_url": changelog_url, |
| 77 | + "pr_url": pr_url, |
| 78 | + "pr_title": pr_title, |
| 79 | + "parsing_errors": parsing_errors, |
| 80 | + } |
| 81 | + ) |
| 82 | + |
| 83 | + return versions |
| 84 | + |
| 85 | + |
| 86 | +def fetch_registry_version_date(connector_name: str, version: str) -> str | None: |
| 87 | + """Fetch the release date for a specific version from the registry. |
| 88 | +
|
| 89 | + Returns the release date string (YYYY-MM-DD) if found, None otherwise. |
| 90 | + """ |
| 91 | + try: # noqa: PLR1702 |
| 92 | + registry_url = "https://connectors.airbyte.com/files/registries/v0/oss_registry.json" |
| 93 | + response = requests.get(registry_url, timeout=10) |
| 94 | + response.raise_for_status() |
| 95 | + registry_data = response.json() |
| 96 | + |
| 97 | + connector_list = registry_data.get("sources", []) + registry_data.get("destinations", []) |
| 98 | + |
| 99 | + for connector in connector_list: |
| 100 | + docker_repo = connector.get("dockerRepository", "") |
| 101 | + if docker_repo == f"airbyte/{connector_name}": |
| 102 | + releases = connector.get("releases", {}) |
| 103 | + release_candidates = releases.get("releaseCandidates", {}) |
| 104 | + |
| 105 | + if version in release_candidates: |
| 106 | + version_data = release_candidates[version] |
| 107 | + generated = version_data.get("generated", {}) |
| 108 | + git_info = generated.get("git", {}) |
| 109 | + commit_timestamp = git_info.get("commit_timestamp") |
| 110 | + |
| 111 | + if commit_timestamp: |
| 112 | + date_match = re.match(r"(\d{4}-\d{2}-\d{2})", commit_timestamp) |
| 113 | + if date_match: |
| 114 | + return date_match.group(1) |
| 115 | + |
| 116 | + break |
| 117 | + else: |
| 118 | + return None |
| 119 | + except Exception as e: |
| 120 | + logger.debug(f"Failed to fetch registry date for {connector_name} v{version}: {e}") |
| 121 | + return None |
0 commit comments