Skip to content

Commit 7eb746b

Browse files
feat(mcp): Add get_api_docs_urls tool with data.externalDocumentationUrls support (#858)
Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
1 parent 782b1f2 commit 7eb746b

File tree

3 files changed

+376
-3
lines changed

3 files changed

+376
-3
lines changed

airbyte/mcp/connector_registry.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,17 @@
1212
from pydantic import BaseModel, Field
1313

1414
from airbyte import exceptions as exc
15-
from airbyte._executors.util import DEFAULT_MANIFEST_URL
1615
from airbyte._util.meta import is_docker_installed
1716
from airbyte.mcp._tool_utils import mcp_tool, register_tools
1817
from airbyte.mcp._util import resolve_list_of_strings
1918
from airbyte.registry import (
19+
_DEFAULT_MANIFEST_URL,
20+
ApiDocsUrl,
2021
ConnectorMetadata,
2122
ConnectorVersionInfo,
2223
InstallType,
2324
get_available_connectors,
25+
get_connector_api_docs_urls,
2426
get_connector_metadata,
2527
)
2628
from airbyte.registry import get_connector_version_history as _get_connector_version_history
@@ -159,7 +161,7 @@ def get_connector_info(
159161
connector.install()
160162
config_spec_jsonschema = connector.config_spec
161163

162-
manifest_url = DEFAULT_MANIFEST_URL.format(
164+
manifest_url = _DEFAULT_MANIFEST_URL.format(
163165
source_name=connector_name,
164166
version="latest",
165167
)
@@ -173,6 +175,34 @@ def get_connector_info(
173175
)
174176

175177

178+
@mcp_tool(
179+
domain="registry",
180+
read_only=True,
181+
idempotent=True,
182+
)
183+
def get_api_docs_urls(
184+
connector_name: Annotated[
185+
str,
186+
Field(
187+
description=(
188+
"The canonical connector name "
189+
"(e.g., 'source-facebook-marketing', 'destination-snowflake')"
190+
)
191+
),
192+
],
193+
) -> list[ApiDocsUrl] | Literal["Connector not found."]:
194+
"""Get API documentation URLs for a connector.
195+
196+
This tool retrieves documentation URLs for a connector's upstream API from multiple sources:
197+
- Registry metadata (documentationUrl, externalDocumentationUrls)
198+
- Connector manifest.yaml file (data.externalDocumentationUrls)
199+
"""
200+
try:
201+
return get_connector_api_docs_urls(connector_name)
202+
except exc.AirbyteConnectorNotRegisteredError:
203+
return "Connector not found."
204+
205+
176206
@mcp_tool(
177207
domain="registry",
178208
read_only=True,

airbyte/registry.py

Lines changed: 181 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@
1010
from copy import copy
1111
from enum import Enum
1212
from pathlib import Path
13-
from typing import cast
13+
from typing import Any, cast
1414

1515
import requests
16+
import yaml
1617
from pydantic import BaseModel, Field
18+
from typing_extensions import Self
1719

1820
from airbyte import exceptions as exc
1921
from airbyte._registry_utils import fetch_registry_version_date, parse_changelog_html
@@ -38,6 +40,10 @@
3840
_PYTHON_LANGUAGE_TAG = f"language:{_PYTHON_LANGUAGE}"
3941
_MANIFEST_ONLY_TAG = f"language:{_MANIFEST_ONLY_LANGUAGE}"
4042

43+
_DEFAULT_MANIFEST_URL = (
44+
"https://connectors.airbyte.com/files/metadata/airbyte/{source_name}/{version}/manifest.yaml"
45+
)
46+
4147

4248
class InstallType(str, Enum):
4349
"""The type of installation for a connector."""
@@ -294,6 +300,180 @@ class ConnectorVersionInfo(BaseModel):
294300
parsing_errors: list[str] = Field(default_factory=list)
295301

296302

303+
class ApiDocsUrl(BaseModel):
304+
"""API documentation URL information."""
305+
306+
title: str
307+
url: str
308+
source: str
309+
doc_type: str = Field(default="other", alias="type")
310+
requires_login: bool = Field(default=False, alias="requiresLogin")
311+
312+
model_config = {"populate_by_name": True}
313+
314+
@classmethod
315+
def from_manifest_dict(cls, manifest_data: dict[str, Any]) -> list[Self]:
316+
"""Extract documentation URLs from parsed manifest data.
317+
318+
Args:
319+
manifest_data: The parsed manifest.yaml data as a dictionary
320+
321+
Returns:
322+
List of ApiDocsUrl objects extracted from the manifest
323+
"""
324+
results: list[Self] = []
325+
326+
data_section = manifest_data.get("data")
327+
if isinstance(data_section, dict):
328+
external_docs = data_section.get("externalDocumentationUrls")
329+
if isinstance(external_docs, list):
330+
results = [
331+
cls(
332+
title=doc["title"],
333+
url=doc["url"],
334+
source="data_external_docs",
335+
doc_type=doc.get("type", "other"),
336+
requires_login=doc.get("requiresLogin", False),
337+
)
338+
for doc in external_docs
339+
]
340+
341+
return results
342+
343+
344+
def _manifest_url_for(connector_name: str) -> str:
345+
"""Get the expected URL of the manifest.yaml file for a connector.
346+
347+
Args:
348+
connector_name: The canonical connector name (e.g., "source-facebook-marketing")
349+
350+
Returns:
351+
The URL to the connector's manifest.yaml file
352+
"""
353+
return _DEFAULT_MANIFEST_URL.format(
354+
source_name=connector_name,
355+
version="latest",
356+
)
357+
358+
359+
def _fetch_manifest_dict(url: str) -> dict[str, Any]:
360+
"""Fetch and parse a manifest.yaml file from a URL.
361+
362+
Args:
363+
url: The URL to fetch the manifest from
364+
365+
Returns:
366+
The parsed manifest data as a dictionary, or empty dict if manifest not found (404)
367+
368+
Raises:
369+
HTTPError: If the request fails with a non-404 status code
370+
"""
371+
http_not_found = 404
372+
373+
response = requests.get(url, timeout=10)
374+
if response.status_code == http_not_found:
375+
return {}
376+
377+
response.raise_for_status()
378+
return yaml.safe_load(response.text) or {}
379+
380+
381+
def _extract_docs_from_registry(connector_name: str) -> list[ApiDocsUrl]:
382+
"""Extract documentation URLs from connector registry metadata.
383+
384+
Args:
385+
connector_name: The canonical connector name (e.g., "source-facebook-marketing")
386+
387+
Returns:
388+
List of ApiDocsUrl objects extracted from the registry
389+
"""
390+
registry_url = _get_registry_url()
391+
response = requests.get(registry_url, timeout=10)
392+
response.raise_for_status()
393+
registry_data = response.json()
394+
395+
connector_list = registry_data.get("sources", []) + registry_data.get("destinations", [])
396+
connector_entry = None
397+
for entry in connector_list:
398+
if entry.get("dockerRepository", "").endswith(f"/{connector_name}"):
399+
connector_entry = entry
400+
break
401+
402+
docs_urls = []
403+
404+
if connector_entry and "documentationUrl" in connector_entry:
405+
docs_urls.append(
406+
ApiDocsUrl(
407+
title="Airbyte Documentation",
408+
url=connector_entry["documentationUrl"],
409+
source="registry",
410+
)
411+
)
412+
413+
if connector_entry and "externalDocumentationUrls" in connector_entry:
414+
external_docs = connector_entry["externalDocumentationUrls"]
415+
if isinstance(external_docs, list):
416+
docs_urls.extend(
417+
[
418+
ApiDocsUrl(
419+
title=doc["title"],
420+
url=doc["url"],
421+
source="registry_external_docs",
422+
doc_type=doc.get("type", "other"),
423+
requires_login=doc.get("requiresLogin", False),
424+
)
425+
for doc in external_docs
426+
]
427+
)
428+
429+
return docs_urls
430+
431+
432+
def get_connector_api_docs_urls(connector_name: str) -> list[ApiDocsUrl]:
433+
"""Get API documentation URLs for a connector.
434+
435+
This function retrieves documentation URLs for a connector's upstream API from multiple sources:
436+
- Registry metadata (documentationUrl, externalDocumentationUrls)
437+
- Connector manifest.yaml file (data.externalDocumentationUrls)
438+
439+
Args:
440+
connector_name: The canonical connector name (e.g., "source-facebook-marketing")
441+
442+
Returns:
443+
List of ApiDocsUrl objects with documentation URLs, deduplicated by URL.
444+
445+
Raises:
446+
AirbyteConnectorNotRegisteredError: If the connector is not found in the registry.
447+
"""
448+
if connector_name not in get_available_connectors(InstallType.DOCKER):
449+
raise exc.AirbyteConnectorNotRegisteredError(
450+
connector_name=connector_name,
451+
context={
452+
"registry_url": _get_registry_url(),
453+
"available_connectors": get_available_connectors(InstallType.DOCKER),
454+
},
455+
)
456+
457+
docs_urls: list[ApiDocsUrl] = []
458+
459+
registry_urls = _extract_docs_from_registry(connector_name)
460+
docs_urls.extend(registry_urls)
461+
462+
manifest_url = _manifest_url_for(connector_name)
463+
manifest_data = _fetch_manifest_dict(manifest_url)
464+
manifest_urls = ApiDocsUrl.from_manifest_dict(manifest_data)
465+
docs_urls.extend(manifest_urls)
466+
467+
seen_urls = set()
468+
unique_docs_urls = []
469+
for doc_url in docs_urls:
470+
if doc_url.url not in seen_urls:
471+
seen_urls.add(doc_url.url)
472+
unique_docs_urls.append(doc_url)
473+
474+
return unique_docs_urls
475+
476+
297477
def get_connector_version_history(
298478
connector_name: str,
299479
*,

0 commit comments

Comments
 (0)