Skip to content

Commit 782b1f2

Browse files
feat(mcp): Add get_connector_version_history tool to registry domain (#857)
Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
1 parent 813d0d0 commit 782b1f2

File tree

14 files changed

+608
-297
lines changed

14 files changed

+608
-297
lines changed

airbyte/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,18 +125,18 @@
125125

126126
from typing import TYPE_CHECKING
127127

128+
from airbyte import registry
128129
from airbyte.caches.bigquery import BigQueryCache
129130
from airbyte.caches.duckdb import DuckDBCache
130131
from airbyte.caches.util import get_colab_cache, get_default_cache, new_local_cache
131132
from airbyte.datasets import CachedDataset
132133
from airbyte.destinations.base import Destination
133134
from airbyte.destinations.util import get_destination
134135
from airbyte.records import StreamRecord
136+
from airbyte.registry import get_available_connectors
135137
from airbyte.results import ReadResult, WriteResult
136138
from airbyte.secrets import SecretSourceEnum, get_secret
137-
from airbyte.sources import registry
138139
from airbyte.sources.base import Source
139-
from airbyte.sources.registry import get_available_connectors
140140
from airbyte.sources.util import get_source
141141

142142

airbyte/_executors/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
if TYPE_CHECKING:
1515
from collections.abc import Generator, Iterable, Iterator
1616

17-
from airbyte.sources.registry import ConnectorMetadata
17+
from airbyte.registry import ConnectorMetadata
1818

1919

2020
_LATEST_VERSION = "latest"

airbyte/_executors/noop.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from typing import IO
1818

1919
from airbyte._message_iterators import AirbyteMessageIterator
20-
from airbyte.sources.registry import ConnectorMetadata
20+
from airbyte.registry import ConnectorMetadata
2121

2222

2323
logger = logging.getLogger("airbyte")

airbyte/_executors/python.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222

2323
if TYPE_CHECKING:
24-
from airbyte.sources.registry import ConnectorMetadata
24+
from airbyte.registry import ConnectorMetadata
2525

2626

2727
class VenvExecutor(Executor):

airbyte/_executors/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from airbyte._util.meta import which
2121
from airbyte._util.telemetry import EventState, log_install_state # Non-public API
2222
from airbyte.constants import AIRBYTE_OFFLINE_MODE, DEFAULT_PROJECT_DIR, TEMP_DIR_OVERRIDE
23-
from airbyte.sources.registry import ConnectorMetadata, InstallType, get_connector_metadata
23+
from airbyte.registry import ConnectorMetadata, InstallType, get_connector_metadata
2424
from airbyte.version import get_version
2525

2626

airbyte/_registry_utils.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2+
"""@private Utility functions for working with the Airbyte connector registry."""
3+
4+
import logging
5+
import re
6+
7+
import requests
8+
9+
10+
logger = logging.getLogger("airbyte.registry")
11+
12+
13+
def parse_changelog_html( # noqa: PLR0914
14+
html_content: str, connector_name: str
15+
) -> list[dict[str, str | list[str] | None]]:
16+
"""Parse changelog HTML to extract version history.
17+
18+
Returns a list of dicts with keys: version, release_date, docker_image_url,
19+
changelog_url, pr_url, pr_title, parsing_errors.
20+
"""
21+
versions: list[dict[str, str | list[str] | None]] = []
22+
23+
connector_type = "sources" if connector_name.startswith("source-") else "destinations"
24+
connector_short_name = connector_name.replace("source-", "").replace("destination-", "")
25+
26+
changelog_url = (
27+
f"https://docs.airbyte.com/integrations/{connector_type}/{connector_short_name}#changelog"
28+
)
29+
30+
row_pattern = re.compile(
31+
r"<tr><td[^>]*>([^<]+)<td[^>]*>([^<]+)<td[^>]*>(.*?)<td[^>]*>(.*?)<tr>", re.DOTALL
32+
)
33+
34+
pr_pattern = re.compile(
35+
r"<a href=https://github\.com/airbytehq/airbyte/pull/(\d+)[^>]*>(\d+)</a>"
36+
)
37+
38+
for match in row_pattern.finditer(html_content):
39+
version = match.group(1).strip()
40+
date = match.group(2).strip()
41+
pr_cell = match.group(3)
42+
subject = match.group(4).strip()
43+
44+
if not re.match(r"\d{4}-\d{2}-\d{2}", date):
45+
continue
46+
47+
pr_matches = list(pr_pattern.finditer(pr_cell))
48+
pr_url = None
49+
pr_title = None
50+
parsing_errors = []
51+
52+
if pr_matches:
53+
first_pr = pr_matches[0]
54+
pr_number = first_pr.group(1)
55+
pr_url = f"https://github.com/airbytehq/airbyte/pull/{pr_number}"
56+
57+
pr_title = re.sub(r"<[^>]+>", "", subject)
58+
pr_title = pr_title.replace("&quot;", '"').replace("&amp;", "&")
59+
pr_title = pr_title.replace("&lt;", "<").replace("&gt;", ">")
60+
pr_title = pr_title.strip()
61+
62+
if len(pr_matches) > 1:
63+
parsing_errors.append(
64+
f"Multiple PRs found for version {version}, using first PR: {pr_number}"
65+
)
66+
else:
67+
parsing_errors.append(f"No PR link found in changelog for version {version}")
68+
69+
docker_image_url = f"https://hub.docker.com/r/airbyte/{connector_name}/tags?name={version}"
70+
71+
versions.append(
72+
{
73+
"version": version,
74+
"release_date": date or None,
75+
"docker_image_url": docker_image_url,
76+
"changelog_url": changelog_url,
77+
"pr_url": pr_url,
78+
"pr_title": pr_title,
79+
"parsing_errors": parsing_errors,
80+
}
81+
)
82+
83+
return versions
84+
85+
86+
def fetch_registry_version_date(connector_name: str, version: str) -> str | None:
87+
"""Fetch the release date for a specific version from the registry.
88+
89+
Returns the release date string (YYYY-MM-DD) if found, None otherwise.
90+
"""
91+
try: # noqa: PLR1702
92+
registry_url = "https://connectors.airbyte.com/files/registries/v0/oss_registry.json"
93+
response = requests.get(registry_url, timeout=10)
94+
response.raise_for_status()
95+
registry_data = response.json()
96+
97+
connector_list = registry_data.get("sources", []) + registry_data.get("destinations", [])
98+
99+
for connector in connector_list:
100+
docker_repo = connector.get("dockerRepository", "")
101+
if docker_repo == f"airbyte/{connector_name}":
102+
releases = connector.get("releases", {})
103+
release_candidates = releases.get("releaseCandidates", {})
104+
105+
if version in release_candidates:
106+
version_data = release_candidates[version]
107+
generated = version_data.get("generated", {})
108+
git_info = generated.get("git", {})
109+
commit_timestamp = git_info.get("commit_timestamp")
110+
111+
if commit_timestamp:
112+
date_match = re.match(r"(\d{4}-\d{2}-\d{2})", commit_timestamp)
113+
if date_match:
114+
return date_match.group(1)
115+
116+
break
117+
else:
118+
return None
119+
except Exception as e:
120+
logger.debug(f"Failed to fetch registry date for {connector_name} v{version}: {e}")
121+
return None

airbyte/_util/registry_spec.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import requests
1111

1212
from airbyte import exceptions as exc
13-
from airbyte.sources.registry import get_connector_metadata
13+
from airbyte.registry import get_connector_metadata
1414
from airbyte.version import get_version
1515

1616

airbyte/mcp/connector_registry.py

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,32 @@
44
# Note: Deferred type evaluation must be avoided due to FastMCP/Pydantic needing
55
# types to be available at import time for tool registration.
66
import contextlib
7+
import logging
78
from typing import Annotated, Any, Literal
89

10+
import requests
911
from fastmcp import FastMCP
1012
from pydantic import BaseModel, Field
1113

14+
from airbyte import exceptions as exc
1215
from airbyte._executors.util import DEFAULT_MANIFEST_URL
1316
from airbyte._util.meta import is_docker_installed
1417
from airbyte.mcp._tool_utils import mcp_tool, register_tools
1518
from airbyte.mcp._util import resolve_list_of_strings
16-
from airbyte.sources import get_available_connectors
17-
from airbyte.sources.registry import ConnectorMetadata, InstallType, get_connector_metadata
19+
from airbyte.registry import (
20+
ConnectorMetadata,
21+
ConnectorVersionInfo,
22+
InstallType,
23+
get_available_connectors,
24+
get_connector_metadata,
25+
)
26+
from airbyte.registry import get_connector_version_history as _get_connector_version_history
1827
from airbyte.sources.util import get_source
1928

2029

30+
logger = logging.getLogger("airbyte.mcp")
31+
32+
2133
@mcp_tool(
2234
domain="registry",
2335
read_only=True,
@@ -161,6 +173,71 @@ def get_connector_info(
161173
)
162174

163175

176+
@mcp_tool(
177+
domain="registry",
178+
read_only=True,
179+
idempotent=True,
180+
)
181+
def get_connector_version_history(
182+
connector_name: Annotated[
183+
str,
184+
Field(
185+
description="The name of the connector (e.g., 'source-faker', 'destination-postgres')"
186+
),
187+
],
188+
num_versions_to_validate: Annotated[
189+
int,
190+
Field(
191+
description=(
192+
"Number of most recent versions to validate with registry data for accurate "
193+
"release dates. Defaults to 5."
194+
),
195+
default=5,
196+
),
197+
] = 5,
198+
limit: Annotated[
199+
int | None,
200+
Field(
201+
description=(
202+
"DEPRECATED: Use num_versions_to_validate instead. "
203+
"Maximum number of versions to return (most recent first). "
204+
"If specified, only the first N versions will be returned."
205+
),
206+
default=None,
207+
),
208+
] = None,
209+
) -> list[ConnectorVersionInfo] | Literal["Connector not found.", "Failed to fetch changelog."]:
210+
"""Get version history for a connector.
211+
212+
This tool retrieves the version history for a connector, including:
213+
- Version number
214+
- Release date (from changelog, with registry override for recent versions)
215+
- DockerHub URL for the version
216+
- Changelog URL
217+
- PR URL and title (scraped from changelog)
218+
219+
For the most recent N versions (default 5), release dates are fetched from the
220+
registry for accuracy. For older versions, changelog dates are used.
221+
222+
Returns:
223+
List of version information, sorted by most recent first.
224+
"""
225+
try:
226+
versions = _get_connector_version_history(
227+
connector_name=connector_name,
228+
num_versions_to_validate=num_versions_to_validate,
229+
)
230+
except exc.AirbyteConnectorNotRegisteredError:
231+
return "Connector not found."
232+
except requests.exceptions.RequestException:
233+
logger.exception(f"Failed to fetch changelog for {connector_name}")
234+
return "Failed to fetch changelog."
235+
else:
236+
if limit is not None and limit > 0:
237+
return versions[:limit]
238+
return versions
239+
240+
164241
def register_connector_registry_tools(app: FastMCP) -> None:
165242
"""@private Register tools with the FastMCP app.
166243

airbyte/mcp/local_ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@
1515
from airbyte.caches.util import get_default_cache
1616
from airbyte.mcp._tool_utils import mcp_tool, register_tools
1717
from airbyte.mcp._util import resolve_config, resolve_list_of_strings
18+
from airbyte.registry import get_connector_metadata
1819
from airbyte.secrets.config import _get_secret_sources
1920
from airbyte.secrets.env_vars import DotenvSecretManager
2021
from airbyte.secrets.google_gsm import GoogleGSMSecretManager
2122
from airbyte.sources.base import Source
22-
from airbyte.sources.registry import get_connector_metadata
2323

2424

2525
if TYPE_CHECKING:

0 commit comments

Comments
 (0)