diff --git a/.github/workflows/smokeshow.yml b/.github/workflows/smokeshow.yml index 4d46fb44b8..d4519e9abc 100644 --- a/.github/workflows/smokeshow.yml +++ b/.github/workflows/smokeshow.yml @@ -19,7 +19,7 @@ jobs: with: python-version: "3.10" - run: pip install smokeshow - - uses: actions/download-artifact@v7 + - uses: actions/download-artifact@v6 with: name: coverage-html path: backend/htmlcov diff --git a/.github/workflows/test-backend.yml b/.github/workflows/test-backend.yml index dfd6c916f9..3a4932f39d 100644 --- a/.github/workflows/test-backend.yml +++ b/.github/workflows/test-backend.yml @@ -59,7 +59,7 @@ jobs: if: failure() run: docker compose logs --no-color db || true - name: Store coverage files - uses: actions/upload-artifact@v6 + uses: actions/upload-artifact@v5 with: name: coverage-html path: backend/htmlcov diff --git a/backend/app/api/controllers/dropbox_controller.py b/backend/app/api/controllers/dropbox_controller.py index 8fe59a8c06..0ae08dbc5b 100644 --- a/backend/app/api/controllers/dropbox_controller.py +++ b/backend/app/api/controllers/dropbox_controller.py @@ -1,4 +1,5 @@ import uuid +from datetime import datetime from typing import Any from fastapi.datastructures import UploadFile @@ -18,6 +19,16 @@ def __init__(self) -> None: self.response_class: type[ResponseSchema[Any]] = ResponseSchema self.error_class = AppException + def _serialize_datetime(self, obj: Any) -> Any: + """Recursively serialize datetime objects to ISO format strings""" + if isinstance(obj, datetime): + return obj.isoformat() + elif isinstance(obj, dict): + return {k: self._serialize_datetime(v) for k, v in obj.items()} + elif isinstance(obj, list): + return [self._serialize_datetime(item) for item in obj] + return obj + def _success( self, data: Any = None, @@ -40,6 +51,8 @@ def _success( elif isinstance(data, SQLModel): # Convert SQLModel to dict with proper UUID serialization data_payload = data.model_dump(mode="json") + # Recursively serialize any remaining datetime objects (e.g., in extra_data) + data_payload = self._serialize_datetime(data_payload) payload = self.response_class( success=True, @@ -94,7 +107,6 @@ async def connect_dropbox_with_tokens( access_token=token_response.access_token, expires_in=token_response.expires_in, refresh_token=token_response.refresh_token, - token_type=token_response.token_type, scope=token_response.scope, user_id=user_id, ) @@ -133,6 +145,20 @@ async def get_all_files_with_namespaces( except Exception as e: return self._error(message=e) + async def get_all_files( + self, + user_id: uuid.UUID, + ) -> JSONResponse: + """Get all files as a flat list without namespace organization""" + try: + files = await self.service.get_all_files_combined(user_id=user_id) + return self._success( + data={"files": files, "total_files": len(files)}, + message="Successfully retrieved all files", + ) + except Exception as e: + return self._error(message=e) + async def get_all_namespaces( self, user_id: uuid.UUID, @@ -182,4 +208,3 @@ async def upload_file( ) except Exception as e: return self._error(message=e) - diff --git a/backend/app/api/controllers/search_controller.py b/backend/app/api/controllers/search_controller.py new file mode 100644 index 0000000000..8f5f3b50dd --- /dev/null +++ b/backend/app/api/controllers/search_controller.py @@ -0,0 +1,127 @@ +import uuid +from typing import Any + +from fastapi.responses import JSONResponse +from sqlmodel import SQLModel +from starlette import status + +from app.core.exceptions import AppException +from app.schemas.response import ResponseSchema +from app.services.search_service import SearchService + + +class SearchController: + def __init__(self) -> None: + self.service = SearchService() + self.response_class: type[ResponseSchema[Any]] = ResponseSchema + self.error_class = AppException + + def _serialize_datetime(self, obj: Any) -> Any: + """Recursively serialize datetime objects to ISO format strings""" + if isinstance(obj, (str, int, float, bool, type(None))): + return obj + elif hasattr(obj, "isoformat"): # datetime objects + return obj.isoformat() + elif isinstance(obj, dict): + return {k: self._serialize_datetime(v) for k, v in obj.items()} + elif isinstance(obj, list): + return [self._serialize_datetime(item) for item in obj] + return obj + + def _success( + self, + data: Any = None, + message: str = "OK", + status_code: int = status.HTTP_200_OK, + ) -> JSONResponse: + msg = message + data_payload = data + + if isinstance(data, dict): + msg = data.get("message") or message + if "user" in data: + data_payload = data.get("user") + elif "data" in data: + data_payload = data.get("data") + if isinstance(data_payload, dict) and "message" in data_payload: + data_payload = { + k: v for k, v in data_payload.items() if k != "message" + } + elif isinstance(data, SQLModel): + # Convert SQLModel to dict with proper UUID serialization + data_payload = data.model_dump(mode="json") + # Recursively serialize any remaining datetime objects + data_payload = self._serialize_datetime(data_payload) + + # Serialize datetime objects in data_payload + data_payload = self._serialize_datetime(data_payload) + + payload = self.response_class( + success=True, + message=msg, + data=data_payload, + errors=None, + meta=None, + ).model_dump(mode="json", exclude_none=True) + + return JSONResponse(status_code=status_code, content=payload) + + def _error( + self, message: Any = "Error", errors: Any = None, status_code: int | None = None + ) -> JSONResponse: + code = status_code + if isinstance(message, self.error_class): + exc = message + fallback_status = getattr(exc, "status_code", status.HTTP_400_BAD_REQUEST) + if code is None: + if isinstance(fallback_status, int): + code = fallback_status + else: + code = status.HTTP_400_BAD_REQUEST + payload = self.response_class( + success=False, + message=getattr(exc, "message", str(exc)), + errors=getattr(exc, "details", None), + data=None, + ).model_dump(mode="json", exclude_none=True) + return JSONResponse(status_code=int(code), content=payload) + + code = code if code is not None else status.HTTP_400_BAD_REQUEST + msg = str(message) + + payload = self.response_class( + success=False, + message=msg, + errors=errors, + data=None, + ).model_dump(mode="json", exclude_none=True) + + return JSONResponse(status_code=int(code), content=payload) + + async def search_all_providers( + self, + user_id: uuid.UUID, + query: str, + search_in_content: bool = True, + max_file_size: int = 10 * 1024 * 1024, + ) -> JSONResponse: + try: + if not query or not query.strip(): + return self._error( + message="Search query cannot be empty", + status_code=status.HTTP_400_BAD_REQUEST, + ) + + results = await self.service.search_all_providers( + user_id=user_id, + search_query=query, + search_in_content=search_in_content, + max_file_size=max_file_size, + ) + + return self._success( + data=results, + message=f"Search completed. Found {results.get('total_files', 0)} files matching '{query}'", + ) + except Exception as e: + return self._error(message=e) diff --git a/backend/app/api/main.py b/backend/app/api/main.py index 6b82f199de..fdc87c4529 100644 --- a/backend/app/api/main.py +++ b/backend/app/api/main.py @@ -1,6 +1,6 @@ from fastapi import APIRouter -from app.api.routes import auth, dropbox, integrations, one_drive, utils, ws +from app.api.routes import auth, dropbox, integrations, one_drive, search, utils, ws api_router = APIRouter() api_router.include_router(auth.router) @@ -9,3 +9,4 @@ api_router.include_router(integrations.router) api_router.include_router(one_drive.router) api_router.include_router(dropbox.router) +api_router.include_router(search.router) diff --git a/backend/app/api/routes/dropbox.py b/backend/app/api/routes/dropbox.py index 29351fda9c..b485b5cbfd 100644 --- a/backend/app/api/routes/dropbox.py +++ b/backend/app/api/routes/dropbox.py @@ -34,6 +34,14 @@ async def connect_dropbox_with_tokens( ) +@router.get("/files") +async def get_all_files( + user_id: uuid.UUID = Depends(get_current_user_id), +) -> JSONResponse: + """Get all files as a flat list without namespace organization""" + return await controller.get_all_files(user_id=user_id) + + @router.get("/files/all") async def get_all_files_with_namespaces( user_id: uuid.UUID = Depends(get_current_user_id), diff --git a/backend/app/api/routes/search.py b/backend/app/api/routes/search.py new file mode 100644 index 0000000000..813f1e6eb1 --- /dev/null +++ b/backend/app/api/routes/search.py @@ -0,0 +1,30 @@ +import uuid + +from fastapi import APIRouter, Depends, Query +from fastapi.responses import JSONResponse + +from app.api.controllers.search_controller import SearchController +from app.api.deps import get_current_user_id + +router = APIRouter(prefix="/search", tags=["Search"]) +controller = SearchController() + + +@router.get("/files") +async def search_all_providers( + query: str = Query(..., description="Search query string (e.g., 'asad')"), + search_in_content: bool = Query( + True, description="Whether to search inside file contents" + ), + max_file_size: int = Query( + 10 * 1024 * 1024, + description="Maximum file size to search content in bytes (default: 10MB)", + ), + user_id: uuid.UUID = Depends(get_current_user_id), +) -> JSONResponse: + return await controller.search_all_providers( + user_id=user_id, + query=query, + search_in_content=search_in_content, + max_file_size=max_file_size, + ) diff --git a/backend/app/schemas/external_account.py b/backend/app/schemas/external_account.py index 435e5364b3..5229a7aff9 100644 --- a/backend/app/schemas/external_account.py +++ b/backend/app/schemas/external_account.py @@ -28,9 +28,8 @@ class OneDriveTokenResponse(BaseModel): class DropboxTokenResponse(BaseModel): access_token: str = Field(..., description="Dropbox OAuth access token") - expires_in: int | None = Field( - None, description="Access token expiration time in seconds" + expires_in: str | None = Field( + None, description="Access token expiration time in ISO 8601 format" ) - token_type: str | None = Field(default="Bearer", description="Token type") refresh_token: str | None = Field(None, description="Dropbox OAuth refresh token") scope: str | None = Field(None, description="OAuth scopes granted") diff --git a/backend/app/services/dropbox_service.py b/backend/app/services/dropbox_service.py index faf6fbe983..33eda176b2 100644 --- a/backend/app/services/dropbox_service.py +++ b/backend/app/services/dropbox_service.py @@ -8,6 +8,7 @@ import httpx from dropbox import Dropbox from dropbox.exceptions import ApiError, AuthError +from dropbox.files import FileMetadata from sqlmodel import Session, select from app.core.config import settings @@ -23,8 +24,7 @@ async def connect_dropbox_with_tokens( self, access_token: str, refresh_token: str | None = None, - expires_in: int | None = None, - token_type: str | None = None, + expires_in: str | None = None, scope: str | None = None, user_id: uuid.UUID | None = None, session: Session | None = None, @@ -34,7 +34,7 @@ async def connect_dropbox_with_tokens( expires_at = None if expires_in: - expires_at = datetime.utcnow() + timedelta(seconds=expires_in) + expires_at = datetime.fromisoformat(expires_in) user_info = await self._get_dropbox_user_info(access_token) provider_account_id = user_info.get("account_id") @@ -43,8 +43,8 @@ async def connect_dropbox_with_tokens( final_scope = scope or settings.DROPBOX_SCOPE token_info = { - "token_type": token_type, - "expires_in": expires_in, + "token_type": "Bearer", + "expires_at": expires_at.isoformat() if expires_at else None, "refresh_token": refresh_token, "scope": final_scope, } @@ -260,7 +260,10 @@ async def get_all_files_with_namespaces( try: files = await self.get_files_for_namespace( - user_id, namespace_id, session=session + user_id, + namespace_id, + namespace_type=namespace.get("namespace_type"), + session=session, ) namespace_data = { "namespace": namespace, @@ -281,6 +284,41 @@ async def get_all_files_with_namespaces( return result + async def get_all_files( + self, + user_id: uuid.UUID, + session: Session | None = None, + ) -> list[dict[str, Any]]: + """Get all files as a flat list without namespace organization""" + namespaces = await self.get_all_namespaces(user_id, session=session) + all_files: list[dict[str, Any]] = [] + + for namespace in namespaces: + namespace_id = namespace.get("namespace_id") + if not namespace_id: + continue + + try: + files = await self.get_files_for_namespace( + user_id, + namespace_id, + namespace_type=namespace.get("namespace_type"), + session=session, + ) + # Add namespace info to each file + for file in files: + file_with_namespace = file.copy() + file_with_namespace["namespace"] = { + "namespace_id": namespace.get("namespace_id"), + "name": namespace.get("name"), + "namespace_type": namespace.get("namespace_type"), + } + all_files.append(file_with_namespace) + except Exception as e: + logger.error(f"Failed to get files for namespace {namespace_id}: {e}") + + return all_files + async def get_all_namespaces( self, user_id: uuid.UUID, @@ -351,90 +389,93 @@ async def get_files_for_namespace( self, user_id: uuid.UUID, namespace_id: str, + namespace_type: str | None = None, session: Session | None = None, ) -> list[dict[str, Any]]: - """Get all files for a specific namespace (recursively)""" account = await self.get_dropbox_account(user_id, session=session) if not account: raise ValueError("Dropbox account not connected") access_token = await self._ensure_valid_token(account, session=session) - all_files = [] - dbx = Dropbox(access_token) + # If namespace_type is not provided, determine it by checking if it's the personal account + if namespace_type is None: + try: + dbx_temp = Dropbox(access_token) + account_obj = await asyncio.to_thread( + dbx_temp.users_get_current_account + ) + if namespace_id == account_obj.account_id: + namespace_type = "personal" + else: + namespace_type = "team" + except Exception as e: + logger.debug( + f"Could not determine namespace type, defaulting to team: {e}" + ) + namespace_type = "team" - def convert_metadata_to_dict(metadata: Any) -> dict[str, Any]: - """Convert Dropbox metadata object to dictionary""" - if hasattr(metadata, "__dict__"): - result = {} - for key, value in metadata.__dict__.items(): - if key.startswith("_"): - continue - if hasattr(value, "__dict__"): - result[key] = convert_metadata_to_dict(value) - elif hasattr(value, "get_tag"): - result[key] = value.get_tag() - else: - result[key] = value - return result - return {} + # For personal namespace, use regular Dropbox client (no namespace header) + # For team namespaces, use namespace header + if namespace_type == "personal": + dbx = Dropbox(access_token) + else: + dbx = self._get_dbx_with_namespace(access_token, namespace_id) - async def get_files_recursive(path: str = "") -> None: - """Recursively get all files from a folder""" - try: - # List folder using SDK - result = await asyncio.to_thread(dbx.files_list_folder, path or "") - - # Process entries - for entry in result.entries: - entry_dict = convert_metadata_to_dict(entry) - # Add .tag for compatibility - if hasattr(entry, "get_tag"): - entry_dict[".tag"] = entry.get_tag() - elif isinstance(entry, dropbox.files.FolderMetadata): - entry_dict[".tag"] = "folder" - elif isinstance(entry, dropbox.files.FileMetadata): - entry_dict[".tag"] = "file" - - all_files.append(entry_dict) - - # If it's a folder, recursively get its contents - if isinstance(entry, dropbox.files.FolderMetadata): - entry_path = entry.path_lower or entry.path_display - if entry_path: - await get_files_recursive(path=entry_path) - - # Handle pagination - while result.has_more: - cursor = result.cursor - result = await asyncio.to_thread( - dbx.files_list_folder_continue, cursor - ) + all_files: list[dict[str, Any]] = [] - for entry in result.entries: - entry_dict = convert_metadata_to_dict(entry) - if hasattr(entry, "get_tag"): - entry_dict[".tag"] = entry.get_tag() - elif isinstance(entry, dropbox.files.FolderMetadata): - entry_dict[".tag"] = "folder" - elif isinstance(entry, dropbox.files.FileMetadata): - entry_dict[".tag"] = "file" - - all_files.append(entry_dict) - - if isinstance(entry, dropbox.files.FolderMetadata): - entry_path = entry.path_lower or entry.path_display - if entry_path: - await get_files_recursive(path=entry_path) - except ApiError as e: - logger.error(f"Failed to get files from {path}: {e}") - except Exception as e: - logger.error(f"Error fetching files from {path}: {e}") + try: + result = await asyncio.to_thread( + dbx.files_list_folder, + path="", + recursive=True, + include_media_info=True, + include_deleted=False, + ) - # Start recursive file fetching from root - await get_files_recursive() + def process_entries(entries): + for entry in entries: + # Check if entry is a file or folder using isinstance + is_file = isinstance(entry, FileMetadata) + + data = { + "id": entry.id, + "name": entry.name, + "path_lower": entry.path_lower, + "path_display": entry.path_display, + ".tag": "file" if is_file else "folder", + } + + if is_file: + data.update( + { + "size": entry.size, + "rev": entry.rev, + "content_hash": entry.content_hash, + "client_modified": entry.client_modified.isoformat() + if entry.client_modified + else None, + "server_modified": entry.server_modified.isoformat() + if entry.server_modified + else None, + } + ) + + all_files.append(data) + + process_entries(result.entries) + + while result.has_more: + result = await asyncio.to_thread( + dbx.files_list_folder_continue, result.cursor + ) + process_entries(result.entries) - return all_files + return all_files + + except ApiError as e: + logger.error(f"Dropbox list error: {e}") + return [] async def get_dropbox_account( self, @@ -515,3 +556,251 @@ async def upload_file_to_dropbox( except Exception as e: logger.error(f"Unexpected error uploading file to Dropbox: {e}") raise ValueError(f"Failed to upload file: {str(e)}") + + def _get_dbx_with_namespace(self, access_token: str, namespace_id: str) -> Dropbox: + return Dropbox( + access_token, + headers={"Dropbox-API-Path-Root": f'{{"namespace_id": "{namespace_id}"}}'}, + ) + + async def _list_files_in_namespace(self, dbx: Dropbox) -> list[dict[str, Any]]: + """Helper method to list all files in a namespace using a Dropbox client""" + all_files: list[dict[str, Any]] = [] + + try: + result = await asyncio.to_thread( + dbx.files_list_folder, + path="", + recursive=True, + include_media_info=True, + include_deleted=False, + ) + + def process_entries(entries): + for entry in entries: + # Check if entry is a file or folder using isinstance + is_file = isinstance(entry, FileMetadata) + + data = { + "id": entry.id, + "name": entry.name, + "path_lower": entry.path_lower, + "path_display": entry.path_display, + ".tag": "file" if is_file else "folder", + } + + if is_file: + data.update( + { + "size": entry.size, + "rev": entry.rev, + "content_hash": entry.content_hash, + "client_modified": entry.client_modified.isoformat() + if entry.client_modified + else None, + "server_modified": entry.server_modified.isoformat() + if entry.server_modified + else None, + } + ) + + all_files.append(data) + + process_entries(result.entries) + + while result.has_more: + result = await asyncio.to_thread( + dbx.files_list_folder_continue, result.cursor + ) + process_entries(result.entries) + + return all_files + + except ApiError as e: + logger.error(f"Dropbox list error: {e}") + return [] + + async def get_all_files_combined(self, user_id: uuid.UUID) -> list[dict[str, Any]]: + """ + Get all files: personal + any team/shared namespaces + """ + account = await self.get_dropbox_account(user_id) + if not account: + raise ValueError("Dropbox account not connected") + + access_token = await self._ensure_valid_token(account) + dbx = Dropbox(access_token) + all_files: list[dict[str, Any]] = [] + + # 1️⃣ Personal files + personal_files = await self._list_files_in_namespace(dbx) + for f in personal_files: + f["namespace_type"] = "personal" + all_files.extend(personal_files) + + # 2️⃣ Team/other namespaces (requires team scope) + try: + team_namespaces = await asyncio.to_thread(dbx.team_namespaces_list) + for ns in team_namespaces.namespaces: + dbx_ns = Dropbox( + access_token, + headers={ + "Dropbox-API-Path-Root": f'{{"namespace_id": "{ns.namespace_id}"}}' + }, + ) + namespace_files = await self._list_files_in_namespace(dbx_ns) + for f in namespace_files: + f["namespace_type"] = "team" + f["namespace_id"] = ns.namespace_id + f["namespace_name"] = ns.name + all_files.extend(namespace_files) + except Exception as e: + logger.debug(f"No team/shared namespaces or unable to access: {e}") + + return all_files + + async def search_files( + self, + user_id: uuid.UUID, + query: str, + search_in_content: bool = True, + session: Session | None = None, + ) -> list[dict[str, Any]]: + """ + Search files in Dropbox using native search API (files/search_v2). + This searches both filename and content efficiently using Dropbox's indexed search. + """ + account = await self.get_dropbox_account(user_id, session=session) + if not account: + raise ValueError("Dropbox account not connected") + + access_token = await self._ensure_valid_token(account, session=session) + dbx = Dropbox(access_token) + + all_results: list[dict[str, Any]] = [] + + try: + # Use Dropbox search_v2 API which searches both filename and content + from dropbox.files import SearchV2Arg + + # Search in personal namespace + search_arg = SearchV2Arg(query=query) + result = await asyncio.to_thread(dbx.files_search_v2, search_arg) + + def process_matches(matches): + for match in matches: + if hasattr(match, "metadata") and hasattr(match.metadata, "metadata"): + entry = match.metadata.metadata + is_file = isinstance(entry, FileMetadata) + + data = { + "id": entry.id, + "name": entry.name, + "path_lower": entry.path_lower, + "path_display": entry.path_display, + ".tag": "file" if is_file else "folder", + "namespace_type": "personal", + } + + if is_file: + data.update( + { + "size": entry.size, + "rev": entry.rev, + "content_hash": entry.content_hash, + "client_modified": entry.client_modified.isoformat() + if entry.client_modified + else None, + "server_modified": entry.server_modified.isoformat() + if entry.server_modified + else None, + } + ) + + all_results.append(data) + + if hasattr(result, "matches"): + process_matches(result.matches) + + # Handle pagination + while hasattr(result, "has_more") and result.has_more: + if hasattr(result, "cursor"): + search_arg = SearchV2Arg(query=query, cursor=result.cursor) + result = await asyncio.to_thread(dbx.files_search_v2, search_arg) + if hasattr(result, "matches"): + process_matches(result.matches) + else: + break + + # Also search in team namespaces if available + try: + team_namespaces = await asyncio.to_thread(dbx.team_namespaces_list) + for ns in team_namespaces.namespaces: + dbx_ns = self._get_dbx_with_namespace(access_token, ns.namespace_id) + search_arg = SearchV2Arg(query=query) + ns_result = await asyncio.to_thread( + dbx_ns.files_search_v2, search_arg + ) + + def process_ns_matches(matches): + for match in matches: + if hasattr(match, "metadata") and hasattr( + match.metadata, "metadata" + ): + entry = match.metadata.metadata + is_file = isinstance(entry, FileMetadata) + + data = { + "id": entry.id, + "name": entry.name, + "path_lower": entry.path_lower, + "path_display": entry.path_display, + ".tag": "file" if is_file else "folder", + "namespace_type": "team", + "namespace_id": ns.namespace_id, + "namespace_name": ns.name, + } + + if is_file: + data.update( + { + "size": entry.size, + "rev": entry.rev, + "content_hash": entry.content_hash, + "client_modified": entry.client_modified.isoformat() + if entry.client_modified + else None, + "server_modified": entry.server_modified.isoformat() + if entry.server_modified + else None, + } + ) + + all_results.append(data) + + if hasattr(ns_result, "matches"): + process_ns_matches(ns_result.matches) + + # Handle pagination for namespace search + while hasattr(ns_result, "has_more") and ns_result.has_more: + if hasattr(ns_result, "cursor"): + search_arg = SearchV2Arg( + query=query, cursor=ns_result.cursor + ) + ns_result = await asyncio.to_thread( + dbx_ns.files_search_v2, search_arg + ) + if hasattr(ns_result, "matches"): + process_ns_matches(ns_result.matches) + else: + break + except (ApiError, AttributeError) as e: + logger.debug(f"Team namespaces search not available: {e}") + + except ApiError as e: + logger.error(f"Dropbox search error: {e}") + # Fallback: if search_v2 is not available, return empty results + # The search_v2 API requires Dropbox Business/Professional accounts + return [] + + return all_results diff --git a/backend/app/services/integrations_service.py b/backend/app/services/integrations_service.py index 9cf6612e46..07f7803c0d 100644 --- a/backend/app/services/integrations_service.py +++ b/backend/app/services/integrations_service.py @@ -514,3 +514,67 @@ async def update_google_drive_file( else: raise ValueError("No update parameters provided") + + async def search_google_drive_files( + self, + user_id: uuid.UUID, + query: str, + search_in_content: bool = True, + session: Session | None = None, + ) -> list[dict[str, Any]]: + """ + Search files in Google Drive using native search API. + This searches both filename and content efficiently using Google's indexed search. + """ + account = await self.get_google_drive_account(user_id, session=session) + if not account: + raise ValueError("Google Drive account not connected") + + access_token = await self._ensure_valid_token(account, session=session) + headers = {"Authorization": f"Bearer {access_token}"} + + all_results: list[dict[str, Any]] = [] + + # Google Drive search query: search in both name and fullText (content) + # The fullText contains operator searches within file contents for supported file types + # Escape single quotes in the query by replacing them with escaped version + escaped_query = query.replace("'", "\\'") + if search_in_content: + search_query = f"name contains '{escaped_query}' or fullText contains '{escaped_query}'" + else: + search_query = f"name contains '{escaped_query}'" + + url = "https://www.googleapis.com/drive/v3/files" + params: dict[str, Any] = { + "q": search_query, + "pageSize": 100, + "fields": "nextPageToken, files(id, name, mimeType, size, createdTime, modifiedTime, webViewLink, webContentLink)", + } + + async with httpx.AsyncClient(timeout=30.0) as client: + page_token = None + while True: + if page_token: + params["pageToken"] = page_token + else: + params.pop("pageToken", None) + + try: + response = await client.get(url, headers=headers, params=params) + if response.status_code != 200: + error_detail = response.text + logger.error(f"Failed to search Google Drive files: {error_detail}") + break + + result: dict[str, Any] = response.json() + files = result.get("files", []) + all_results.extend(files) + + page_token = result.get("nextPageToken") + if not page_token: + break + except Exception as e: + logger.error(f"Error searching Google Drive: {e}") + break + + return all_results diff --git a/backend/app/services/one_drive_service.py b/backend/app/services/one_drive_service.py index db9116313a..3ffa637d6a 100644 --- a/backend/app/services/one_drive_service.py +++ b/backend/app/services/one_drive_service.py @@ -369,3 +369,101 @@ async def upload_file_to_one_drive( "file_name": file_name, "file_id": result.get("id"), } + + async def search_files( + self, + user_id: uuid.UUID, + query: str, + search_in_content: bool = True, + session: Session | None = None, + ) -> list[dict[str, Any]]: + """ + Search files in OneDrive using native Microsoft Graph search API. + This searches both filename and content efficiently using Microsoft's indexed search. + """ + account = await self.get_one_drive_account(user_id, session=session) + if not account: + raise ValueError("OneDrive account not connected") + + access_token = await self._ensure_valid_token(account, session=session) + headers = {"Authorization": f"Bearer {access_token}"} + + all_results: list[dict[str, Any]] = [] + + async with httpx.AsyncClient(timeout=30.0) as client: + # Search in personal OneDrive + # Microsoft Graph search API searches both filename and content + # URL encode the query parameter + encoded_query = quote(query, safe="") + search_url = f"https://graph.microsoft.com/v1.0/me/drive/root/search(q='{encoded_query}')" + + try: + response = await client.get(search_url, headers=headers) + if response.status_code == 200: + data = response.json() + items = data.get("value", []) + for item in items: + item["tenant"] = {"driveType": "personal", "name": "Personal OneDrive"} + all_results.append(item) + + # Handle pagination + next_link = data.get("@odata.nextLink") + while next_link: + response = await client.get(next_link, headers=headers) + if response.status_code == 200: + data = response.json() + items = data.get("value", []) + for item in items: + item["tenant"] = {"driveType": "personal", "name": "Personal OneDrive"} + all_results.append(item) + next_link = data.get("@odata.nextLink") + else: + break + except Exception as e: + logger.error(f"Error searching personal OneDrive: {e}") + + # Also search in SharePoint sites/tenants + try: + tenants = await self.get_all_tenants(user_id, session=session) + for tenant in tenants: + site_id = tenant.get("id") + drive_type = tenant.get("driveType", "sharepoint") + + if drive_type == "personal": + continue # Already searched above + + if not site_id: + continue + + try: + # Search in SharePoint site drive + # URL encode the query parameter + encoded_query = quote(query, safe="") + site_search_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drive/root/search(q='{encoded_query}')" + response = await client.get(site_search_url, headers=headers) + if response.status_code == 200: + data = response.json() + items = data.get("value", []) + for item in items: + item["tenant"] = tenant + all_results.append(item) + + # Handle pagination + next_link = data.get("@odata.nextLink") + while next_link: + response = await client.get(next_link, headers=headers) + if response.status_code == 200: + data = response.json() + items = data.get("value", []) + for item in items: + item["tenant"] = tenant + all_results.append(item) + next_link = data.get("@odata.nextLink") + else: + break + except Exception as e: + logger.debug(f"Error searching tenant {site_id}: {e}") + except Exception as e: + logger.debug(f"Error getting tenants for search: {e}") + + return all_results diff --git a/backend/app/services/search_service.py b/backend/app/services/search_service.py new file mode 100644 index 0000000000..e015d1f5ed --- /dev/null +++ b/backend/app/services/search_service.py @@ -0,0 +1,186 @@ +import asyncio +import logging +import uuid +from typing import Any + +from sqlmodel import Session + +from app.services.dropbox_service import DropboxService +from app.services.integrations_service import IntegrationService +from app.services.one_drive_service import OneDriveService + +logger = logging.getLogger(__name__) + + +class SearchService: + def __init__(self) -> None: + self.dropbox_service = DropboxService() + self.one_drive_service = OneDriveService() + self.google_drive_service = IntegrationService() + + async def search_all_providers( + self, + user_id: uuid.UUID, + search_query: str, + search_in_content: bool = True, + max_file_size: int = 10 * 1024 * 1024, # 10MB default + session: Session | None = None, + ) -> dict[str, Any]: + if not search_query or not search_query.strip(): + return { + "query": search_query, + "results": { + "dropbox": {"files": [], "total": 0, "error": None}, + "one_drive": {"files": [], "total": 0, "error": None}, + "google_drive": {"files": [], "total": 0, "error": None}, + }, + "total_files": 0, + } + + search_query_lower = search_query.lower().strip() + + # Search all providers in parallel + dropbox_task = self._search_dropbox( + user_id, search_query_lower, search_in_content, max_file_size, session + ) + one_drive_task = self._search_one_drive( + user_id, search_query_lower, search_in_content, max_file_size, session + ) + google_drive_task = self._search_google_drive( + user_id, search_query_lower, search_in_content, max_file_size, session + ) + + dropbox_results, one_drive_results, google_drive_results = await asyncio.gather( + dropbox_task, one_drive_task, google_drive_task, return_exceptions=True + ) + + # Handle exceptions + if isinstance(dropbox_results, Exception): + logger.error(f"Dropbox search error: {dropbox_results}") + dropbox_results = {"files": [], "total": 0, "error": str(dropbox_results)} + if isinstance(one_drive_results, Exception): + logger.error(f"OneDrive search error: {one_drive_results}") + one_drive_results = { + "files": [], + "total": 0, + "error": str(one_drive_results), + } + if isinstance(google_drive_results, Exception): + logger.error(f"Google Drive search error: {google_drive_results}") + google_drive_results = { + "files": [], + "total": 0, + "error": str(google_drive_results), + } + + total_files = ( + dropbox_results.get("total", 0) + + one_drive_results.get("total", 0) + + google_drive_results.get("total", 0) + ) + + return { + "query": search_query, + "results": { + "dropbox": dropbox_results, + "one_drive": one_drive_results, + "google_drive": google_drive_results, + }, + "total_files": total_files, + } + + async def _search_dropbox( + self, + user_id: uuid.UUID, + search_query: str, + search_in_content: bool, + max_file_size: int, + session: Session | None = None, + ) -> dict[str, Any]: + try: + # Use native Dropbox search API (searches both filename and content) + matching_files = await self.dropbox_service.search_files( + user_id=user_id, + query=search_query, + search_in_content=search_in_content, + session=session, + ) + + # Add provider and match_type to each result + for file in matching_files: + file["provider"] = "dropbox" + # Dropbox search API searches both filename and content, so we mark as "both" + file["match_type"] = "both" + + return { + "files": matching_files, + "total": len(matching_files), + "error": None, + } + except Exception as e: + logger.error(f"Error searching Dropbox: {e}") + return {"files": [], "total": 0, "error": str(e)} + + async def _search_one_drive( + self, + user_id: uuid.UUID, + search_query: str, + search_in_content: bool, + max_file_size: int, + session: Session | None = None, + ) -> dict[str, Any]: + try: + # Use native Microsoft Graph search API (searches both filename and content) + matching_files = await self.one_drive_service.search_files( + user_id=user_id, + query=search_query, + search_in_content=search_in_content, + session=session, + ) + + # Add provider and match_type to each result + for file in matching_files: + file["provider"] = "one_drive" + # Microsoft Graph search API searches both filename and content, so we mark as "both" + file["match_type"] = "both" + + return { + "files": matching_files, + "total": len(matching_files), + "error": None, + } + except Exception as e: + logger.error(f"Error searching OneDrive: {e}") + return {"files": [], "total": 0, "error": str(e)} + + async def _search_google_drive( + self, + user_id: uuid.UUID, + search_query: str, + search_in_content: bool, + max_file_size: int, + session: Session | None = None, + ) -> dict[str, Any]: + try: + # Use native Google Drive search API (searches both filename and content) + matching_files = await self.google_drive_service.search_google_drive_files( + user_id=user_id, + query=search_query, + search_in_content=search_in_content, + session=session, + ) + + # Add provider and match_type to each result + for file in matching_files: + file["provider"] = "google_drive" + # Google Drive search API searches both filename and content, so we mark as "both" + file["match_type"] = "both" + + return { + "files": matching_files, + "total": len(matching_files), + "error": None, + } + except Exception as e: + logger.error(f"Error searching Google Drive: {e}") + return {"files": [], "total": 0, "error": str(e)} diff --git a/release-notes.md b/release-notes.md index 13fd3866f7..9cc766d826 100644 --- a/release-notes.md +++ b/release-notes.md @@ -2,16 +2,6 @@ ## Latest Changes -### Internal - -* ⬆ Bump @tanstack/react-router from 1.140.0 to 1.141.2 in /frontend. PR [#2045](https://github.com/fastapi/full-stack-fastapi-template/pull/2045) by [@dependabot[bot]](https://github.com/apps/dependabot). -* ⬆ Bump actions/download-artifact from 6 to 7. PR [#2051](https://github.com/fastapi/full-stack-fastapi-template/pull/2051) by [@dependabot[bot]](https://github.com/apps/dependabot). -* ⬆ Bump actions/upload-artifact from 5 to 6. PR [#2050](https://github.com/fastapi/full-stack-fastapi-template/pull/2050) by [@dependabot[bot]](https://github.com/apps/dependabot). -* ⬆ Bump @types/node from 24.10.1 to 25.0.2 in /frontend. PR [#2048](https://github.com/fastapi/full-stack-fastapi-template/pull/2048) by [@dependabot[bot]](https://github.com/apps/dependabot). -* ⬆ Bump @tailwindcss/vite from 4.1.17 to 4.1.18 in /frontend. PR [#2049](https://github.com/fastapi/full-stack-fastapi-template/pull/2049) by [@dependabot[bot]](https://github.com/apps/dependabot). -* ⬆ Bump vite from 7.2.7 to 7.3.0 in /frontend. PR [#2047](https://github.com/fastapi/full-stack-fastapi-template/pull/2047) by [@dependabot[bot]](https://github.com/apps/dependabot). -* ⬆ Bump react-dom from 19.2.1 to 19.2.3 in /frontend. PR [#2046](https://github.com/fastapi/full-stack-fastapi-template/pull/2046) by [@dependabot[bot]](https://github.com/apps/dependabot). - ## 0.9.0 ### Features