Skip to content

Commit 72d2555

Browse files
committed
fix: use pydantic models for create index payloads
1 parent 09a64c3 commit 72d2555

File tree

3 files changed

+335
-307
lines changed

3 files changed

+335
-307
lines changed

src/uipath/_services/context_grounding_service.py

Lines changed: 109 additions & 172 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import json
21
from typing import Any, Dict, List, Optional, Tuple, Union
32

43
import httpx
@@ -10,16 +9,26 @@
109
from .._folder_context import FolderContext
1110
from .._utils import Endpoint, RequestSpec, header_folder, infer_bindings
1211
from .._utils.constants import (
13-
CONFLUENCE_DATA_SOURCE,
14-
DROPBOX_DATA_SOURCE,
15-
GOOGLE_DRIVE_DATA_SOURCE,
1612
LLMV4,
17-
ONEDRIVE_DATA_SOURCE,
18-
ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE,
1913
)
2014
from ..models import IngestionInProgressException
2115
from ..models.context_grounding import ContextGroundingQueryResponse
2216
from ..models.context_grounding_index import ContextGroundingIndex
17+
from ..models.context_grounding_payloads import (
18+
BucketDataSource,
19+
BucketSourceConfig,
20+
ConfluenceDataSource,
21+
ConfluenceSourceConfig,
22+
CreateIndexPayload,
23+
DropboxDataSource,
24+
DropboxSourceConfig,
25+
GoogleDriveDataSource,
26+
GoogleDriveSourceConfig,
27+
OneDriveDataSource,
28+
OneDriveSourceConfig,
29+
PreProcessing,
30+
SourceConfig,
31+
)
2332
from ..tracing._traced import traced
2433
from ._base_service import BaseService
2534
from .buckets_service import BucketsService
@@ -323,7 +332,7 @@ async def retrieve_by_id_async(
323332
def create_index(
324333
self,
325334
name: str,
326-
source: Dict[str, Any],
335+
source: SourceConfig,
327336
description: Optional[str] = None,
328337
cron_expression: Optional[str] = None,
329338
time_zone_id: Optional[str] = None,
@@ -336,12 +345,12 @@ def create_index(
336345
337346
Args:
338347
name (str): The name of the index to create.
339-
source (dict): Source configuration dictionary:
340-
- For buckets: type="bucket", bucket_name, folder_path, directory_path="/" (optional), file_type (optional)
341-
- For Google Drive: type="google", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional)
342-
- For Dropbox: type="dropbox", connection_name, connection_id, directory_path, folder_path, file_type (optional)
343-
- For OneDrive: type="onedrive", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional)
344-
- For Confluence: type="confluence", connection_name, connection_id, space_id, directory_path, folder_path, file_type (optional)
348+
source (SourceConfig): Source configuration using one of:
349+
- BucketSourceConfig: For storage buckets
350+
- GoogleDriveSourceConfig: For Google Drive
351+
- DropboxSourceConfig: For Dropbox
352+
- OneDriveSourceConfig: For OneDrive
353+
- ConfluenceSourceConfig: For Confluence
345354
description (Optional[str]): Description of the index.
346355
cron_expression (Optional[str]): Cron expression for scheduled indexing (e.g., "0 0 18 ? * 2" for Tuesdays at 6 PM).
347356
time_zone_id (Optional[str]): Valid Windows Timezone ID for the cron expression (e.g., "UTC", "Pacific Standard Time", "GTB Standard Time").
@@ -370,7 +379,7 @@ def create_index(
370379
response = self.request(
371380
spec.method,
372381
spec.endpoint,
373-
content=spec.content,
382+
json=spec.json,
374383
headers=spec.headers,
375384
)
376385

@@ -381,7 +390,7 @@ def create_index(
381390
async def create_index_async(
382391
self,
383392
name: str,
384-
source: Dict[str, Any],
393+
source: SourceConfig,
385394
description: Optional[str] = None,
386395
cron_expression: Optional[str] = None,
387396
time_zone_id: Optional[str] = None,
@@ -394,12 +403,12 @@ async def create_index_async(
394403
395404
Args:
396405
name (str): The name of the index to create.
397-
source (dict): Source configuration dictionary:
398-
- For buckets: type="bucket", bucket_name, folder_path, directory_path="/" (optional), file_type (optional)
399-
- For Google Drive: type="google_drive", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional)
400-
- For Dropbox: type="dropbox", connection_name, connection_id, directory_path, folder_path, file_type (optional)
401-
- For OneDrive: type="onedrive", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional)
402-
- For Confluence: type="confluence", connection_name, connection_id, space_id, directory_path, folder_path, file_type (optional)
406+
source (SourceConfig): Source configuration using one of:
407+
- BucketSourceConfig: For storage buckets
408+
- GoogleDriveSourceConfig: For Google Drive
409+
- DropboxSourceConfig: For Dropbox
410+
- OneDriveSourceConfig: For OneDrive
411+
- ConfluenceSourceConfig: For Confluence
403412
description (Optional[str]): Description of the index.
404413
cron_expression (Optional[str]): Cron expression for scheduled indexing (e.g., "0 0 18 ? * 2" for Tuesdays at 6 PM).
405414
time_zone_id (Optional[str]): Valid Windows Timezone ID for the cron expression (e.g., "UTC", "Pacific Standard Time", "GTB Standard Time").
@@ -428,7 +437,7 @@ async def create_index_async(
428437
response = await self.request_async(
429438
spec.method,
430439
spec.endpoint,
431-
content=spec.content,
440+
json=spec.json,
432441
headers=spec.headers,
433442
)
434443

@@ -697,7 +706,7 @@ def _create_spec(
697706
self,
698707
name: str,
699708
description: Optional[str],
700-
source: Dict[str, Any],
709+
source: SourceConfig,
701710
advanced_ingestion: bool,
702711
preprocessing_request: str,
703712
cron_expression: Optional[str] = None,
@@ -710,7 +719,7 @@ def _create_spec(
710719
Args:
711720
name: Index name
712721
description: Index description
713-
source: Source configuration dictionary
722+
source: Source configuration (typed model)
714723
cron_expression: Optional cron expression for scheduled indexing
715724
time_zone_id: Optional timezone for cron expression
716725
advanced_ingestion: Whether to enable advanced ingestion with preprocessing
@@ -721,175 +730,103 @@ def _create_spec(
721730
Returns:
722731
RequestSpec for the create index request
723732
"""
724-
source_type = source.get("type", "").lower()
725-
726733
folder_key = self._resolve_folder_key(folder_key, folder_path)
727-
file_type = source.get("file_type")
728-
file_name_glob = f"**/*.{file_type}" if file_type else "**/*"
729734

730-
data_source = self._build_data_source(source_type, source, file_name_glob)
735+
data_source_dict = self._build_data_source(source)
731736

732737
if cron_expression:
733-
data_source["indexer"] = {
738+
data_source_dict["indexer"] = {
734739
"cronExpression": cron_expression,
735740
"timeZoneId": time_zone_id or "UTC",
736741
}
737742

738-
payload = {
739-
"name": name,
740-
"description": description or "",
741-
"dataSource": data_source,
742-
}
743-
744-
if advanced_ingestion and preprocessing_request:
745-
payload["preProcessing"] = {
746-
"@odata.type": preprocessing_request,
747-
}
743+
payload = CreateIndexPayload(
744+
name=name,
745+
description=description or "",
746+
data_source=data_source_dict,
747+
pre_processing=(
748+
PreProcessing(**{"@odata.type": preprocessing_request})
749+
if advanced_ingestion and preprocessing_request
750+
else None
751+
),
752+
)
748753

749754
return RequestSpec(
750755
method="POST",
751756
endpoint=Endpoint("/ecs_/v2/indexes/create"),
752-
content=json.dumps(payload),
757+
json=payload.model_dump(by_alias=True, exclude_none=True),
753758
headers={
754759
**header_folder(folder_key, None),
755-
"Content-Type": "application/json",
756760
},
757761
)
758762

759-
def _build_data_source(
760-
self, source_type: str, source: Dict[str, Any], file_name_glob: str
761-
) -> Dict[str, Any]:
762-
"""Build data source configuration based on type."""
763-
if source_type == "bucket":
764-
return self._build_bucket_data_source(source, file_name_glob)
765-
elif source_type in ["google_drive"]:
766-
return self._build_google_drive_data_source(source, file_name_glob)
767-
elif source_type == "dropbox":
768-
return self._build_dropbox_data_source(source, file_name_glob)
769-
elif source_type == "onedrive":
770-
return self._build_onedrive_data_source(source, file_name_glob)
771-
elif source_type == "confluence":
772-
return self._build_confluence_data_source(source, file_name_glob)
763+
def _build_data_source(self, source: SourceConfig) -> Dict[str, Any]:
764+
"""Build data source configuration from typed source config.
765+
766+
Args:
767+
source: Typed source configuration model
768+
769+
Returns:
770+
Dictionary with data source configuration for API
771+
"""
772+
file_name_glob = f"**/*.{source.file_type}" if source.file_type else "**/*"
773+
774+
data_source: Union[
775+
BucketDataSource,
776+
GoogleDriveDataSource,
777+
DropboxDataSource,
778+
OneDriveDataSource,
779+
ConfluenceDataSource,
780+
]
781+
782+
if isinstance(source, BucketSourceConfig):
783+
data_source = BucketDataSource(
784+
folder=source.folder_path,
785+
bucketName=source.bucket_name,
786+
fileNameGlob=file_name_glob,
787+
directoryPath=source.directory_path,
788+
)
789+
elif isinstance(source, GoogleDriveSourceConfig):
790+
data_source = GoogleDriveDataSource(
791+
folder=source.folder_path,
792+
connectionId=source.connection_id,
793+
connectionName=source.connection_name,
794+
leafFolderId=source.leaf_folder_id,
795+
directoryPath=source.directory_path,
796+
fileNameGlob=file_name_glob,
797+
)
798+
elif isinstance(source, DropboxSourceConfig):
799+
data_source = DropboxDataSource(
800+
folder=source.folder_path,
801+
connectionId=source.connection_id,
802+
connectionName=source.connection_name,
803+
directoryPath=source.directory_path,
804+
fileNameGlob=file_name_glob,
805+
)
806+
elif isinstance(source, OneDriveSourceConfig):
807+
data_source = OneDriveDataSource(
808+
folder=source.folder_path,
809+
connectionId=source.connection_id,
810+
connectionName=source.connection_name,
811+
leafFolderId=source.leaf_folder_id,
812+
directoryPath=source.directory_path,
813+
fileNameGlob=file_name_glob,
814+
)
815+
elif isinstance(source, ConfluenceSourceConfig):
816+
data_source = ConfluenceDataSource(
817+
folder=source.folder_path,
818+
connectionId=source.connection_id,
819+
connectionName=source.connection_name,
820+
directoryPath=source.directory_path,
821+
fileNameGlob=file_name_glob,
822+
spaceId=source.space_id,
823+
)
773824
else:
774825
raise ValueError(
775-
f"Unsupported data source type: {source_type}. "
776-
f"Supported types: bucket, google_drive, dropbox, onedrive, confluence"
826+
f"Unsupported source configuration type: {type(source).__name__}"
777827
)
778828

779-
def _build_bucket_data_source(
780-
self, source: Dict[str, Any], file_name_glob: str
781-
) -> Dict[str, Any]:
782-
"""Build data source configuration for storage bucket."""
783-
required_fields = ["bucket_name", "folder_path"]
784-
for field in required_fields:
785-
if not source.get(field):
786-
raise ValueError(f"{field} is required for bucket data source")
787-
788-
return {
789-
"@odata.type": ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE,
790-
"folder": source["folder_path"],
791-
"bucketName": source["bucket_name"],
792-
"fileNameGlob": file_name_glob,
793-
"directoryPath": source.get("directory_path", "/"),
794-
}
795-
796-
def _build_google_drive_data_source(
797-
self, source: Dict[str, Any], file_name_glob: str
798-
) -> Dict[str, Any]:
799-
"""Build data source configuration for Google Drive."""
800-
required_fields = [
801-
"connection_id",
802-
"connection_name",
803-
"leaf_folder_id",
804-
"directory_path",
805-
"folder_path",
806-
]
807-
for field in required_fields:
808-
if not source.get(field):
809-
raise ValueError(f"{field} is required for Google Drive data source")
810-
811-
return {
812-
"@odata.type": GOOGLE_DRIVE_DATA_SOURCE,
813-
"folder": source["folder_path"],
814-
"connectionId": source["connection_id"],
815-
"connectionName": source["connection_name"],
816-
"leafFolderId": source["leaf_folder_id"],
817-
"directoryPath": source["directory_path"],
818-
"fileNameGlob": file_name_glob,
819-
}
820-
821-
def _build_dropbox_data_source(
822-
self, source: Dict[str, Any], file_name_glob: str
823-
) -> Dict[str, Any]:
824-
"""Build data source configuration for Dropbox."""
825-
required_fields = [
826-
"connection_id",
827-
"connection_name",
828-
"directory_path",
829-
"folder_path",
830-
]
831-
for field in required_fields:
832-
if not source.get(field):
833-
raise ValueError(f"{field} is required for Dropbox data source")
834-
835-
return {
836-
"@odata.type": DROPBOX_DATA_SOURCE,
837-
"folder": source["folder_path"],
838-
"connectionId": source["connection_id"],
839-
"connectionName": source["connection_name"],
840-
"directoryPath": source["directory_path"],
841-
"fileNameGlob": file_name_glob,
842-
}
843-
844-
def _build_onedrive_data_source(
845-
self, source: Dict[str, Any], file_name_glob: str
846-
) -> Dict[str, Any]:
847-
"""Build data source configuration for OneDrive."""
848-
required_fields = [
849-
"connection_id",
850-
"connection_name",
851-
"leaf_folder_id",
852-
"directory_path",
853-
"folder_path",
854-
]
855-
for field in required_fields:
856-
if not source.get(field):
857-
raise ValueError(f"{field} is required for OneDrive data source")
858-
859-
return {
860-
"@odata.type": ONEDRIVE_DATA_SOURCE,
861-
"folder": source["folder_path"],
862-
"connectionId": source["connection_id"],
863-
"connectionName": source["connection_name"],
864-
"leafFolderId": source["leaf_folder_id"],
865-
"directoryPath": source["directory_path"],
866-
"fileNameGlob": file_name_glob,
867-
}
868-
869-
def _build_confluence_data_source(
870-
self, source: Dict[str, Any], file_name_glob: str
871-
) -> Dict[str, Any]:
872-
"""Build data source configuration for Confluence."""
873-
required_fields = [
874-
"connection_id",
875-
"connection_name",
876-
"directory_path",
877-
"folder_path",
878-
"space_id",
879-
]
880-
for field in required_fields:
881-
if not source.get(field):
882-
raise ValueError(f"{field} is required for Confluence data source")
883-
884-
return {
885-
"@odata.type": CONFLUENCE_DATA_SOURCE,
886-
"folder": source["folder_path"],
887-
"connectionId": source["connection_id"],
888-
"connectionName": source["connection_name"],
889-
"directoryPath": source["directory_path"],
890-
"fileNameGlob": file_name_glob,
891-
"spaceId": source["space_id"],
892-
}
829+
return data_source.model_dump(by_alias=True, exclude_none=True)
893830

894831
def _retrieve_by_id_spec(
895832
self,

0 commit comments

Comments
 (0)