|
10 | 10 | from copy import copy |
11 | 11 | from enum import Enum |
12 | 12 | from pathlib import Path |
13 | | -from typing import cast |
| 13 | +from typing import Any, cast |
14 | 14 |
|
15 | 15 | import requests |
| 16 | +import yaml |
16 | 17 | from pydantic import BaseModel, Field |
| 18 | +from typing_extensions import Self |
17 | 19 |
|
18 | 20 | from airbyte import exceptions as exc |
19 | 21 | from airbyte._registry_utils import fetch_registry_version_date, parse_changelog_html |
|
38 | 40 | _PYTHON_LANGUAGE_TAG = f"language:{_PYTHON_LANGUAGE}" |
39 | 41 | _MANIFEST_ONLY_TAG = f"language:{_MANIFEST_ONLY_LANGUAGE}" |
40 | 42 |
|
| 43 | +_DEFAULT_MANIFEST_URL = ( |
| 44 | + "https://connectors.airbyte.com/files/metadata/airbyte/{source_name}/{version}/manifest.yaml" |
| 45 | +) |
| 46 | + |
41 | 47 |
|
42 | 48 | class InstallType(str, Enum): |
43 | 49 | """The type of installation for a connector.""" |
@@ -294,6 +300,180 @@ class ConnectorVersionInfo(BaseModel): |
294 | 300 | parsing_errors: list[str] = Field(default_factory=list) |
295 | 301 |
|
296 | 302 |
|
| 303 | +class ApiDocsUrl(BaseModel): |
| 304 | + """API documentation URL information.""" |
| 305 | + |
| 306 | + title: str |
| 307 | + url: str |
| 308 | + source: str |
| 309 | + doc_type: str = Field(default="other", alias="type") |
| 310 | + requires_login: bool = Field(default=False, alias="requiresLogin") |
| 311 | + |
| 312 | + model_config = {"populate_by_name": True} |
| 313 | + |
| 314 | + @classmethod |
| 315 | + def from_manifest_dict(cls, manifest_data: dict[str, Any]) -> list[Self]: |
| 316 | + """Extract documentation URLs from parsed manifest data. |
| 317 | +
|
| 318 | + Args: |
| 319 | + manifest_data: The parsed manifest.yaml data as a dictionary |
| 320 | +
|
| 321 | + Returns: |
| 322 | + List of ApiDocsUrl objects extracted from the manifest |
| 323 | + """ |
| 324 | + results: list[Self] = [] |
| 325 | + |
| 326 | + data_section = manifest_data.get("data") |
| 327 | + if isinstance(data_section, dict): |
| 328 | + external_docs = data_section.get("externalDocumentationUrls") |
| 329 | + if isinstance(external_docs, list): |
| 330 | + results = [ |
| 331 | + cls( |
| 332 | + title=doc["title"], |
| 333 | + url=doc["url"], |
| 334 | + source="data_external_docs", |
| 335 | + doc_type=doc.get("type", "other"), |
| 336 | + requires_login=doc.get("requiresLogin", False), |
| 337 | + ) |
| 338 | + for doc in external_docs |
| 339 | + ] |
| 340 | + |
| 341 | + return results |
| 342 | + |
| 343 | + |
| 344 | +def _manifest_url_for(connector_name: str) -> str: |
| 345 | + """Get the expected URL of the manifest.yaml file for a connector. |
| 346 | +
|
| 347 | + Args: |
| 348 | + connector_name: The canonical connector name (e.g., "source-facebook-marketing") |
| 349 | +
|
| 350 | + Returns: |
| 351 | + The URL to the connector's manifest.yaml file |
| 352 | + """ |
| 353 | + return _DEFAULT_MANIFEST_URL.format( |
| 354 | + source_name=connector_name, |
| 355 | + version="latest", |
| 356 | + ) |
| 357 | + |
| 358 | + |
| 359 | +def _fetch_manifest_dict(url: str) -> dict[str, Any]: |
| 360 | + """Fetch and parse a manifest.yaml file from a URL. |
| 361 | +
|
| 362 | + Args: |
| 363 | + url: The URL to fetch the manifest from |
| 364 | +
|
| 365 | + Returns: |
| 366 | + The parsed manifest data as a dictionary, or empty dict if manifest not found (404) |
| 367 | +
|
| 368 | + Raises: |
| 369 | + HTTPError: If the request fails with a non-404 status code |
| 370 | + """ |
| 371 | + http_not_found = 404 |
| 372 | + |
| 373 | + response = requests.get(url, timeout=10) |
| 374 | + if response.status_code == http_not_found: |
| 375 | + return {} |
| 376 | + |
| 377 | + response.raise_for_status() |
| 378 | + return yaml.safe_load(response.text) or {} |
| 379 | + |
| 380 | + |
| 381 | +def _extract_docs_from_registry(connector_name: str) -> list[ApiDocsUrl]: |
| 382 | + """Extract documentation URLs from connector registry metadata. |
| 383 | +
|
| 384 | + Args: |
| 385 | + connector_name: The canonical connector name (e.g., "source-facebook-marketing") |
| 386 | +
|
| 387 | + Returns: |
| 388 | + List of ApiDocsUrl objects extracted from the registry |
| 389 | + """ |
| 390 | + registry_url = _get_registry_url() |
| 391 | + response = requests.get(registry_url, timeout=10) |
| 392 | + response.raise_for_status() |
| 393 | + registry_data = response.json() |
| 394 | + |
| 395 | + connector_list = registry_data.get("sources", []) + registry_data.get("destinations", []) |
| 396 | + connector_entry = None |
| 397 | + for entry in connector_list: |
| 398 | + if entry.get("dockerRepository", "").endswith(f"/{connector_name}"): |
| 399 | + connector_entry = entry |
| 400 | + break |
| 401 | + |
| 402 | + docs_urls = [] |
| 403 | + |
| 404 | + if connector_entry and "documentationUrl" in connector_entry: |
| 405 | + docs_urls.append( |
| 406 | + ApiDocsUrl( |
| 407 | + title="Airbyte Documentation", |
| 408 | + url=connector_entry["documentationUrl"], |
| 409 | + source="registry", |
| 410 | + ) |
| 411 | + ) |
| 412 | + |
| 413 | + if connector_entry and "externalDocumentationUrls" in connector_entry: |
| 414 | + external_docs = connector_entry["externalDocumentationUrls"] |
| 415 | + if isinstance(external_docs, list): |
| 416 | + docs_urls.extend( |
| 417 | + [ |
| 418 | + ApiDocsUrl( |
| 419 | + title=doc["title"], |
| 420 | + url=doc["url"], |
| 421 | + source="registry_external_docs", |
| 422 | + doc_type=doc.get("type", "other"), |
| 423 | + requires_login=doc.get("requiresLogin", False), |
| 424 | + ) |
| 425 | + for doc in external_docs |
| 426 | + ] |
| 427 | + ) |
| 428 | + |
| 429 | + return docs_urls |
| 430 | + |
| 431 | + |
| 432 | +def get_connector_api_docs_urls(connector_name: str) -> list[ApiDocsUrl]: |
| 433 | + """Get API documentation URLs for a connector. |
| 434 | +
|
| 435 | + This function retrieves documentation URLs for a connector's upstream API from multiple sources: |
| 436 | + - Registry metadata (documentationUrl, externalDocumentationUrls) |
| 437 | + - Connector manifest.yaml file (data.externalDocumentationUrls) |
| 438 | +
|
| 439 | + Args: |
| 440 | + connector_name: The canonical connector name (e.g., "source-facebook-marketing") |
| 441 | +
|
| 442 | + Returns: |
| 443 | + List of ApiDocsUrl objects with documentation URLs, deduplicated by URL. |
| 444 | +
|
| 445 | + Raises: |
| 446 | + AirbyteConnectorNotRegisteredError: If the connector is not found in the registry. |
| 447 | + """ |
| 448 | + if connector_name not in get_available_connectors(InstallType.DOCKER): |
| 449 | + raise exc.AirbyteConnectorNotRegisteredError( |
| 450 | + connector_name=connector_name, |
| 451 | + context={ |
| 452 | + "registry_url": _get_registry_url(), |
| 453 | + "available_connectors": get_available_connectors(InstallType.DOCKER), |
| 454 | + }, |
| 455 | + ) |
| 456 | + |
| 457 | + docs_urls: list[ApiDocsUrl] = [] |
| 458 | + |
| 459 | + registry_urls = _extract_docs_from_registry(connector_name) |
| 460 | + docs_urls.extend(registry_urls) |
| 461 | + |
| 462 | + manifest_url = _manifest_url_for(connector_name) |
| 463 | + manifest_data = _fetch_manifest_dict(manifest_url) |
| 464 | + manifest_urls = ApiDocsUrl.from_manifest_dict(manifest_data) |
| 465 | + docs_urls.extend(manifest_urls) |
| 466 | + |
| 467 | + seen_urls = set() |
| 468 | + unique_docs_urls = [] |
| 469 | + for doc_url in docs_urls: |
| 470 | + if doc_url.url not in seen_urls: |
| 471 | + seen_urls.add(doc_url.url) |
| 472 | + unique_docs_urls.append(doc_url) |
| 473 | + |
| 474 | + return unique_docs_urls |
| 475 | + |
| 476 | + |
297 | 477 | def get_connector_version_history( |
298 | 478 | connector_name: str, |
299 | 479 | *, |
|
0 commit comments