Skip to content

Add search SP list tool #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: bug/fix-tool-path
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion plugin.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"label": "Sharepoint Tools",
"description": "Collection of agent tools for SharePoint Online",
"author": "Dataiku (Alex Bourret)",
"icon": "dku-icon-microsoft-sharepoint-48",
"icon": "dku-icon-microsoft-sharepoint",
"tags": [],
"url": "",
"licenseInfo": "Apache Software License"
Expand Down
20 changes: 20 additions & 0 deletions python-agent-tools/read-list/tool.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"id": "search-sharepoint-list",
"meta": {
"icon": "dku-icon-microsoft-sharepoint",
"label": "Search a SharePoint Online list"
},

"params" : [
{
"name": "sharepoint_connection",
"label": "SharePoint connection",
"type": "CONNECTION"
},
{
"name": "sharepoint_url",
"label": "SharePoint URL",
"type": "STRING"
}
]
}
104 changes: 104 additions & 0 deletions python-agent-tools/read-list/tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import dataiku
from dataiku.llm.agent_tools import BaseAgentTool
from safe_logger import SafeLogger
from office365_client import Office365Session
from dss_constants import DSSConstants


logger = SafeLogger("sharepoint-tool plugin")


class SearchSharePointListTool(BaseAgentTool):

def set_config(self, config, plugin_config):
logger.info('SharePoint Online plugin search tool v{}'.format(DSSConstants.PLUGIN_VERSION))

self.config = config
connection_name = config.get("sharepoint_connection")
client = dataiku.api_client()
connection = client.get_connection(connection_name)
connection_info = connection.get_info()
credentials = connection_info.get_oauth2_credential()
sharepoint_access_token = credentials.get("accessToken")
sharepoint_url = config.get("sharepoint_url")

self.session = Office365Session(access_token=sharepoint_access_token)
site_id, self.list_id = self.session.extract_site_list_from_url(sharepoint_url)
self.properties = {}
self.output_schema = None
self.initialization_error = None
if not site_id:
self.initialization_error = "The site in '{}' does not exists or is not accessible. Please check your credentials".format(
sharepoint_url
)
return
if not self.list_id:
self.initialization_error = "The list in '{}' does not exists or is not accessible. Please check your credentials".format(
sharepoint_url
)
return
site = self.session.get_site(site_id)
self.list = site.get_list(self.list_id)
self.output_schema, self.properties = self._get_schema_and_properties()

def _get_schema_and_properties(self):
output_columns = []
properties = {}
for sharepoint_column in self.list.get_columns():
column_description = sharepoint_column.get("description")
if column_description:
properties[sharepoint_column.get("name")] = {
"type": "string", # we don't have access to that information
"description": column_description
}
output_columns.append({
"type": "string", # we don't have access to that information
"name": sharepoint_column.get("name")
})
output_schema = {
"columns": output_columns
}
return output_schema, properties

def get_descriptor(self, tool):
# we want to modify the description to add the columns decriptions retrieved from sharepoint
return {
"description": "This tool can be used to access lists on SharePoint Online. The input to this tool is a dictionary containing the name of the column to search and the term to search in it, e.g. '{'City':'Paris', 'Urgency':'High'}'",
"inputSchema": {
"$id": "https://dataiku.com/agents/tools/search/input",
"title": "Search a SharePoint Online list tool",
"type": "object",
"properties": self.properties
}
}

def invoke(self, input, trace):
if self.initialization_error:
return {"error": self.initialization_error}

args = input.get("input", {})
filter_tokens = []
for arg in args:
filter = "fields/{} eq '{}'".format(arg, args.get(arg))
filter_tokens.append(filter)
filter = " and ".join(filter_tokens)
hits = []
try:
for row in self.list.get_next_row(filter=filter):
fields = row.get("fields", {})
filtered_fields = self._filter_fields(fields)
hits.append(filtered_fields)
except Exception as error:
logger.error("Error {}".format(error))
return {"error": "There was an error while searching SharePoint Online"}

return {
"output": hits
}

def _filter_fields(self, fields):
filtered_fields = {}
for field in fields:
if field in self.properties:
filtered_fields[field] = fields.get(field)
return filtered_fields
4 changes: 3 additions & 1 deletion python-lib/office365_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ def get_next_item(self, **kwargs):
kwargs["headers"] = kwargs.get("headers", {})
kwargs["headers"].update(DSSConstants.JSON_HEADERS)
kwargs["headers"].update(DSSConstants.GZIP_HEADERS)
if "$filter" in kwargs.get("params", {}):
kwargs["headers"].update({"Prefer": "HonorNonIndexedQueriesWarningMayFailRandomly"})
is_first_get = True
next_page_url = None
while next_page_url or is_first_get:
Expand Down Expand Up @@ -308,7 +310,7 @@ def search_list(self, query):
if len(value) > 0:
hits_containers = value[0].get("hitsContainers", [])
if len(hits_containers) > 0:
hits = hits_containers.get("hits", [])
hits = hits_containers[0].get("hits", [])
return hits
return []

Expand Down
7 changes: 5 additions & 2 deletions python-lib/office365_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,14 @@ def get_column_url(self):
)
return url

def get_next_row(self):
def get_next_row(self, filter={}):
params = {"expand": "field"}
if filter:
params.update({"$filter": filter})
url = self.get_next_list_row_url()
for row in self.session.get_next_item(
url=url,
params={"expand": "field"},
params=params,
force_no_batch=True
):
yield row
Expand Down