dataiku · alexbourret · May 20, 2025 · May 20, 2025
diff --git a/plugin.json b/plugin.json
@@ -5,7 +5,7 @@
         "label": "Sharepoint Tools",
         "description": "Collection of agent tools for SharePoint Online",
         "author": "Dataiku (Alex Bourret)",
-        "icon": "dku-icon-microsoft-sharepoint-48",
+        "icon": "dku-icon-microsoft-sharepoint",
         "tags": [],
         "url": "",
         "licenseInfo": "Apache Software License"

diff --git a/python-agent-tools/read-list/tool.json b/python-agent-tools/read-list/tool.json
@@ -0,0 +1,20 @@
+{
+    "id": "search-sharepoint-list",
+    "meta": {
+        "icon": "dku-icon-microsoft-sharepoint",
+        "label": "Search a SharePoint Online list"
+    },
+
+    "params" : [
+        {
+            "name": "sharepoint_connection",
+            "label": "SharePoint connection",
+            "type": "CONNECTION"
+        },
+        {
+            "name": "sharepoint_url",
+            "label": "SharePoint URL",
+            "type": "STRING"
+        }
+    ]
+}
diff --git a/python-agent-tools/read-list/tool.py b/python-agent-tools/read-list/tool.py
@@ -0,0 +1,104 @@
+import dataiku
+from dataiku.llm.agent_tools import BaseAgentTool
+from safe_logger import SafeLogger
+from office365_client import Office365Session
+from dss_constants import DSSConstants
+
+
+logger = SafeLogger("sharepoint-tool plugin")
+
+
+class SearchSharePointListTool(BaseAgentTool):
+
+    def set_config(self, config, plugin_config):
+        logger.info('SharePoint Online plugin search tool v{}'.format(DSSConstants.PLUGIN_VERSION))
+
+        self.config = config
+        connection_name = config.get("sharepoint_connection")
+        client = dataiku.api_client()
+        connection = client.get_connection(connection_name)
+        connection_info = connection.get_info()
+        credentials = connection_info.get_oauth2_credential()
+        sharepoint_access_token = credentials.get("accessToken")
+        sharepoint_url = config.get("sharepoint_url")
+
+        self.session = Office365Session(access_token=sharepoint_access_token)
+        site_id, self.list_id = self.session.extract_site_list_from_url(sharepoint_url)
+        self.properties = {}
+        self.output_schema = None
+        self.initialization_error = None
+        if not site_id:
+            self.initialization_error = "The site in '{}' does not exists or is not accessible. Please check your credentials".format(
+                sharepoint_url
+            )
+            return
+        if not self.list_id:
+            self.initialization_error = "The list in '{}' does not exists or is not accessible. Please check your credentials".format(
+                sharepoint_url
+            )
+            return
+        site = self.session.get_site(site_id)
+        self.list = site.get_list(self.list_id)
+        self.output_schema, self.properties = self._get_schema_and_properties()
+
+    def _get_schema_and_properties(self):
+        output_columns = []
+        properties = {}
+        for sharepoint_column in self.list.get_columns():
+            column_description = sharepoint_column.get("description")
+            if column_description:
+                properties[sharepoint_column.get("name")] = {
+                    "type": "string",  # we don't have access to that information
+                    "description": column_description
+                }
+                output_columns.append({
+                    "type": "string",  # we don't have access to that information
+                    "name": sharepoint_column.get("name")
+                })
+        output_schema = {
+            "columns": output_columns
+        }
+        return output_schema, properties
+
+    def get_descriptor(self, tool):
+        # we want to modify the description to add the columns decriptions retrieved from sharepoint
+        return {
+            "description": "This tool can be used to access lists on SharePoint Online. The input to this tool is a dictionary containing the name of the column to search and the term to search in it, e.g. '{'City':'Paris', 'Urgency':'High'}'",
+            "inputSchema": {
+                "$id": "https://dataiku.com/agents/tools/search/input",
+                "title": "Search a SharePoint Online list tool",
+                "type": "object",
+                "properties": self.properties
+            }
+        }
+
+    def invoke(self, input, trace):
+        if self.initialization_error:
+            return {"error": self.initialization_error}
+
+        args = input.get("input", {})
+        filter_tokens = []
+        for arg in args:
+            filter = "fields/{} eq '{}'".format(arg, args.get(arg))
+            filter_tokens.append(filter)
+        filter = " and ".join(filter_tokens)
+        hits = []
+        try:
+            for row in self.list.get_next_row(filter=filter):
+                fields = row.get("fields", {})
+                filtered_fields = self._filter_fields(fields)
+                hits.append(filtered_fields)
+        except Exception as error:
+            logger.error("Error {}".format(error))
+            return {"error": "There was an error while searching SharePoint Online"}
+
+        return {
+            "output": hits
+        }
+
+    def _filter_fields(self, fields):
+        filtered_fields = {}
+        for field in fields:
+            if field in self.properties:
+                filtered_fields[field] = fields.get(field)
+        return filtered_fields
diff --git a/python-lib/office365_client.py b/python-lib/office365_client.py
@@ -82,6 +82,8 @@ def get_next_item(self, **kwargs):
         kwargs["headers"] = kwargs.get("headers", {})
         kwargs["headers"].update(DSSConstants.JSON_HEADERS)
         kwargs["headers"].update(DSSConstants.GZIP_HEADERS)
+        if "$filter" in kwargs.get("params", {}):
+            kwargs["headers"].update({"Prefer": "HonorNonIndexedQueriesWarningMayFailRandomly"})
         is_first_get = True
         next_page_url = None
         while next_page_url or is_first_get:
@@ -308,7 +310,7 @@ def search_list(self, query):
         if len(value) > 0:
             hits_containers = value[0].get("hitsContainers", [])
             if len(hits_containers) > 0:
-                hits = hits_containers.get("hits", [])
+                hits = hits_containers[0].get("hits", [])
                 return hits
         return []
 

diff --git a/python-lib/office365_list.py b/python-lib/office365_list.py
@@ -22,11 +22,14 @@ def get_column_url(self):
         )
         return url
 
-    def get_next_row(self):
+    def get_next_row(self, filter={}):
+        params = {"expand": "field"}
+        if filter:
+            params.update({"$filter": filter})
         url = self.get_next_list_row_url()
         for row in self.session.get_next_item(
             url=url,
-            params={"expand": "field"},
+            params=params,
             force_no_batch=True
         ):
             yield row