1- import json
21from typing import Any , Dict , List , Optional , Tuple , Union
32
43import httpx
109from .._folder_context import FolderContext
1110from .._utils import Endpoint , RequestSpec , header_folder , infer_bindings
1211from .._utils .constants import (
13- CONFLUENCE_DATA_SOURCE ,
14- DROPBOX_DATA_SOURCE ,
15- GOOGLE_DRIVE_DATA_SOURCE ,
1612 LLMV4 ,
17- ONEDRIVE_DATA_SOURCE ,
18- ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE ,
1913)
2014from ..models import IngestionInProgressException
2115from ..models .context_grounding import ContextGroundingQueryResponse
2216from ..models .context_grounding_index import ContextGroundingIndex
17+ from ..models .context_grounding_payloads import (
18+ BucketDataSource ,
19+ BucketSourceConfig ,
20+ ConfluenceDataSource ,
21+ ConfluenceSourceConfig ,
22+ CreateIndexPayload ,
23+ DropboxDataSource ,
24+ DropboxSourceConfig ,
25+ GoogleDriveDataSource ,
26+ GoogleDriveSourceConfig ,
27+ OneDriveDataSource ,
28+ OneDriveSourceConfig ,
29+ PreProcessing ,
30+ SourceConfig ,
31+ )
2332from ..tracing ._traced import traced
2433from ._base_service import BaseService
2534from .buckets_service import BucketsService
@@ -323,7 +332,7 @@ async def retrieve_by_id_async(
323332 def create_index (
324333 self ,
325334 name : str ,
326- source : Dict [ str , Any ] ,
335+ source : SourceConfig ,
327336 description : Optional [str ] = None ,
328337 cron_expression : Optional [str ] = None ,
329338 time_zone_id : Optional [str ] = None ,
@@ -336,12 +345,12 @@ def create_index(
336345
337346 Args:
338347 name (str): The name of the index to create.
339- source (dict ): Source configuration dictionary :
340- - For buckets: type="bucket", bucket_name, folder_path, directory_path="/" (optional), file_type (optional)
341- - For Google Drive: type="google", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional)
342- - For Dropbox: type="dropbox", connection_name, connection_id, directory_path, folder_path, file_type (optional)
343- - For OneDrive: type="onedrive", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional)
344- - For Confluence: type="confluence", connection_name, connection_id, space_id, directory_path, folder_path, file_type (optional)
348+ source (SourceConfig ): Source configuration using one of :
349+ - BucketSourceConfig: For storage buckets
350+ - GoogleDriveSourceConfig: For Google Drive
351+ - DropboxSourceConfig: For Dropbox
352+ - OneDriveSourceConfig: For OneDrive
353+ - ConfluenceSourceConfig: For Confluence
345354 description (Optional[str]): Description of the index.
346355 cron_expression (Optional[str]): Cron expression for scheduled indexing (e.g., "0 0 18 ? * 2" for Tuesdays at 6 PM).
347356 time_zone_id (Optional[str]): Valid Windows Timezone ID for the cron expression (e.g., "UTC", "Pacific Standard Time", "GTB Standard Time").
@@ -370,7 +379,7 @@ def create_index(
370379 response = self .request (
371380 spec .method ,
372381 spec .endpoint ,
373- content = spec .content ,
382+ json = spec .json ,
374383 headers = spec .headers ,
375384 )
376385
@@ -381,7 +390,7 @@ def create_index(
381390 async def create_index_async (
382391 self ,
383392 name : str ,
384- source : Dict [ str , Any ] ,
393+ source : SourceConfig ,
385394 description : Optional [str ] = None ,
386395 cron_expression : Optional [str ] = None ,
387396 time_zone_id : Optional [str ] = None ,
@@ -394,12 +403,12 @@ async def create_index_async(
394403
395404 Args:
396405 name (str): The name of the index to create.
397- source (dict ): Source configuration dictionary :
398- - For buckets: type="bucket", bucket_name, folder_path, directory_path="/" (optional), file_type (optional)
399- - For Google Drive: type="google_drive", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional)
400- - For Dropbox: type="dropbox", connection_name, connection_id, directory_path, folder_path, file_type (optional)
401- - For OneDrive: type="onedrive", connection_name, connection_id, leaf_folder_id, directory_path, folder_path, file_type (optional)
402- - For Confluence: type="confluence", connection_name, connection_id, space_id, directory_path, folder_path, file_type (optional)
406+ source (SourceConfig ): Source configuration using one of :
407+ - BucketSourceConfig: For storage buckets
408+ - GoogleDriveSourceConfig: For Google Drive
409+ - DropboxSourceConfig: For Dropbox
410+ - OneDriveSourceConfig: For OneDrive
411+ - ConfluenceSourceConfig: For Confluence
403412 description (Optional[str]): Description of the index.
404413 cron_expression (Optional[str]): Cron expression for scheduled indexing (e.g., "0 0 18 ? * 2" for Tuesdays at 6 PM).
405414 time_zone_id (Optional[str]): Valid Windows Timezone ID for the cron expression (e.g., "UTC", "Pacific Standard Time", "GTB Standard Time").
@@ -428,7 +437,7 @@ async def create_index_async(
428437 response = await self .request_async (
429438 spec .method ,
430439 spec .endpoint ,
431- content = spec .content ,
440+ json = spec .json ,
432441 headers = spec .headers ,
433442 )
434443
@@ -697,7 +706,7 @@ def _create_spec(
697706 self ,
698707 name : str ,
699708 description : Optional [str ],
700- source : Dict [ str , Any ] ,
709+ source : SourceConfig ,
701710 advanced_ingestion : bool ,
702711 preprocessing_request : str ,
703712 cron_expression : Optional [str ] = None ,
@@ -710,7 +719,7 @@ def _create_spec(
710719 Args:
711720 name: Index name
712721 description: Index description
713- source: Source configuration dictionary
722+ source: Source configuration (typed model)
714723 cron_expression: Optional cron expression for scheduled indexing
715724 time_zone_id: Optional timezone for cron expression
716725 advanced_ingestion: Whether to enable advanced ingestion with preprocessing
@@ -721,175 +730,103 @@ def _create_spec(
721730 Returns:
722731 RequestSpec for the create index request
723732 """
724- source_type = source .get ("type" , "" ).lower ()
725-
726733 folder_key = self ._resolve_folder_key (folder_key , folder_path )
727- file_type = source .get ("file_type" )
728- file_name_glob = f"**/*.{ file_type } " if file_type else "**/*"
729734
730- data_source = self ._build_data_source (source_type , source , file_name_glob )
735+ data_source_dict = self ._build_data_source (source )
731736
732737 if cron_expression :
733- data_source ["indexer" ] = {
738+ data_source_dict ["indexer" ] = {
734739 "cronExpression" : cron_expression ,
735740 "timeZoneId" : time_zone_id or "UTC" ,
736741 }
737742
738- payload = {
739- " name" : name ,
740- " description" : description or "" ,
741- "dataSource" : data_source ,
742- }
743-
744- if advanced_ingestion and preprocessing_request :
745- payload [ "preProcessing" ] = {
746- "@odata.type" : preprocessing_request ,
747- }
743+ payload = CreateIndexPayload (
744+ name = name ,
745+ description = description or "" ,
746+ data_source = data_source_dict ,
747+ pre_processing = (
748+ PreProcessing ( ** { "@odata.type" : preprocessing_request })
749+ if advanced_ingestion and preprocessing_request
750+ else None
751+ ) ,
752+ )
748753
749754 return RequestSpec (
750755 method = "POST" ,
751756 endpoint = Endpoint ("/ecs_/v2/indexes/create" ),
752- content = json . dumps ( payload ),
757+ json = payload . model_dump ( by_alias = True , exclude_none = True ),
753758 headers = {
754759 ** header_folder (folder_key , None ),
755- "Content-Type" : "application/json" ,
756760 },
757761 )
758762
759- def _build_data_source (
760- self , source_type : str , source : Dict [str , Any ], file_name_glob : str
761- ) -> Dict [str , Any ]:
762- """Build data source configuration based on type."""
763- if source_type == "bucket" :
764- return self ._build_bucket_data_source (source , file_name_glob )
765- elif source_type in ["google_drive" ]:
766- return self ._build_google_drive_data_source (source , file_name_glob )
767- elif source_type == "dropbox" :
768- return self ._build_dropbox_data_source (source , file_name_glob )
769- elif source_type == "onedrive" :
770- return self ._build_onedrive_data_source (source , file_name_glob )
771- elif source_type == "confluence" :
772- return self ._build_confluence_data_source (source , file_name_glob )
763+ def _build_data_source (self , source : SourceConfig ) -> Dict [str , Any ]:
764+ """Build data source configuration from typed source config.
765+
766+ Args:
767+ source: Typed source configuration model
768+
769+ Returns:
770+ Dictionary with data source configuration for API
771+ """
772+ file_name_glob = f"**/*.{ source .file_type } " if source .file_type else "**/*"
773+
774+ data_source : Union [
775+ BucketDataSource ,
776+ GoogleDriveDataSource ,
777+ DropboxDataSource ,
778+ OneDriveDataSource ,
779+ ConfluenceDataSource ,
780+ ]
781+
782+ if isinstance (source , BucketSourceConfig ):
783+ data_source = BucketDataSource (
784+ folder = source .folder_path ,
785+ bucketName = source .bucket_name ,
786+ fileNameGlob = file_name_glob ,
787+ directoryPath = source .directory_path ,
788+ )
789+ elif isinstance (source , GoogleDriveSourceConfig ):
790+ data_source = GoogleDriveDataSource (
791+ folder = source .folder_path ,
792+ connectionId = source .connection_id ,
793+ connectionName = source .connection_name ,
794+ leafFolderId = source .leaf_folder_id ,
795+ directoryPath = source .directory_path ,
796+ fileNameGlob = file_name_glob ,
797+ )
798+ elif isinstance (source , DropboxSourceConfig ):
799+ data_source = DropboxDataSource (
800+ folder = source .folder_path ,
801+ connectionId = source .connection_id ,
802+ connectionName = source .connection_name ,
803+ directoryPath = source .directory_path ,
804+ fileNameGlob = file_name_glob ,
805+ )
806+ elif isinstance (source , OneDriveSourceConfig ):
807+ data_source = OneDriveDataSource (
808+ folder = source .folder_path ,
809+ connectionId = source .connection_id ,
810+ connectionName = source .connection_name ,
811+ leafFolderId = source .leaf_folder_id ,
812+ directoryPath = source .directory_path ,
813+ fileNameGlob = file_name_glob ,
814+ )
815+ elif isinstance (source , ConfluenceSourceConfig ):
816+ data_source = ConfluenceDataSource (
817+ folder = source .folder_path ,
818+ connectionId = source .connection_id ,
819+ connectionName = source .connection_name ,
820+ directoryPath = source .directory_path ,
821+ fileNameGlob = file_name_glob ,
822+ spaceId = source .space_id ,
823+ )
773824 else :
774825 raise ValueError (
775- f"Unsupported data source type: { source_type } . "
776- f"Supported types: bucket, google_drive, dropbox, onedrive, confluence"
826+ f"Unsupported source configuration type: { type (source ).__name__ } "
777827 )
778828
779- def _build_bucket_data_source (
780- self , source : Dict [str , Any ], file_name_glob : str
781- ) -> Dict [str , Any ]:
782- """Build data source configuration for storage bucket."""
783- required_fields = ["bucket_name" , "folder_path" ]
784- for field in required_fields :
785- if not source .get (field ):
786- raise ValueError (f"{ field } is required for bucket data source" )
787-
788- return {
789- "@odata.type" : ORCHESTRATOR_STORAGE_BUCKET_DATA_SOURCE ,
790- "folder" : source ["folder_path" ],
791- "bucketName" : source ["bucket_name" ],
792- "fileNameGlob" : file_name_glob ,
793- "directoryPath" : source .get ("directory_path" , "/" ),
794- }
795-
796- def _build_google_drive_data_source (
797- self , source : Dict [str , Any ], file_name_glob : str
798- ) -> Dict [str , Any ]:
799- """Build data source configuration for Google Drive."""
800- required_fields = [
801- "connection_id" ,
802- "connection_name" ,
803- "leaf_folder_id" ,
804- "directory_path" ,
805- "folder_path" ,
806- ]
807- for field in required_fields :
808- if not source .get (field ):
809- raise ValueError (f"{ field } is required for Google Drive data source" )
810-
811- return {
812- "@odata.type" : GOOGLE_DRIVE_DATA_SOURCE ,
813- "folder" : source ["folder_path" ],
814- "connectionId" : source ["connection_id" ],
815- "connectionName" : source ["connection_name" ],
816- "leafFolderId" : source ["leaf_folder_id" ],
817- "directoryPath" : source ["directory_path" ],
818- "fileNameGlob" : file_name_glob ,
819- }
820-
821- def _build_dropbox_data_source (
822- self , source : Dict [str , Any ], file_name_glob : str
823- ) -> Dict [str , Any ]:
824- """Build data source configuration for Dropbox."""
825- required_fields = [
826- "connection_id" ,
827- "connection_name" ,
828- "directory_path" ,
829- "folder_path" ,
830- ]
831- for field in required_fields :
832- if not source .get (field ):
833- raise ValueError (f"{ field } is required for Dropbox data source" )
834-
835- return {
836- "@odata.type" : DROPBOX_DATA_SOURCE ,
837- "folder" : source ["folder_path" ],
838- "connectionId" : source ["connection_id" ],
839- "connectionName" : source ["connection_name" ],
840- "directoryPath" : source ["directory_path" ],
841- "fileNameGlob" : file_name_glob ,
842- }
843-
844- def _build_onedrive_data_source (
845- self , source : Dict [str , Any ], file_name_glob : str
846- ) -> Dict [str , Any ]:
847- """Build data source configuration for OneDrive."""
848- required_fields = [
849- "connection_id" ,
850- "connection_name" ,
851- "leaf_folder_id" ,
852- "directory_path" ,
853- "folder_path" ,
854- ]
855- for field in required_fields :
856- if not source .get (field ):
857- raise ValueError (f"{ field } is required for OneDrive data source" )
858-
859- return {
860- "@odata.type" : ONEDRIVE_DATA_SOURCE ,
861- "folder" : source ["folder_path" ],
862- "connectionId" : source ["connection_id" ],
863- "connectionName" : source ["connection_name" ],
864- "leafFolderId" : source ["leaf_folder_id" ],
865- "directoryPath" : source ["directory_path" ],
866- "fileNameGlob" : file_name_glob ,
867- }
868-
869- def _build_confluence_data_source (
870- self , source : Dict [str , Any ], file_name_glob : str
871- ) -> Dict [str , Any ]:
872- """Build data source configuration for Confluence."""
873- required_fields = [
874- "connection_id" ,
875- "connection_name" ,
876- "directory_path" ,
877- "folder_path" ,
878- "space_id" ,
879- ]
880- for field in required_fields :
881- if not source .get (field ):
882- raise ValueError (f"{ field } is required for Confluence data source" )
883-
884- return {
885- "@odata.type" : CONFLUENCE_DATA_SOURCE ,
886- "folder" : source ["folder_path" ],
887- "connectionId" : source ["connection_id" ],
888- "connectionName" : source ["connection_name" ],
889- "directoryPath" : source ["directory_path" ],
890- "fileNameGlob" : file_name_glob ,
891- "spaceId" : source ["space_id" ],
892- }
829+ return data_source .model_dump (by_alias = True , exclude_none = True )
893830
894831 def _retrieve_by_id_spec (
895832 self ,
0 commit comments