diff --git a/.env b/.env deleted file mode 100644 index 2b6c0cce74..0000000000 --- a/.env +++ /dev/null @@ -1,44 +0,0 @@ - -DOMAIN=localhost -ENVIRONMENT=local - -PROJECT_NAME="Full Stack FastAPI Project" -STACK_NAME=full-stack-fastapi-project - -# Backend -BACKEND_CORS_ORIGINS="http://localhost,http://localhost:5173,https://localhost,https://localhost:5173,http://localhost.tiangolo.com" -SECRET_KEY=changethis -FIRST_SUPERUSER=admin@example.com -FIRST_SUPERUSER_PASSWORD=changethis - -# Emails -SMTP_HOST= -SMTP_USER= -SMTP_PASSWORD= -EMAILS_FROM_EMAIL=info@example.com -SMTP_TLS=True -SMTP_SSL=False -SMTP_PORT=587 - -# Postgres -POSTGRES_SERVER=localhost -POSTGRES_PORT=5432 -POSTGRES_DB=app -POSTGRES_USER=postgres -POSTGRES_PASSWORD=changethis - -SENTRY_DSN= - -# Configure these with your own Docker registry images -DOCKER_IMAGE_BACKEND=backend -#WebEngage -WEBENGAGE_API_KEY=e57841de-1867-4a13-8535-d14c2288e17d -WEBENGAGE_API_URL=https://api.webengage.com/v2/accounts/ -WEBENGAGE_LICENSE_CODE=11b5648a7 -WEBENGAGE_CAMPAIGN_REGISTER_ID=~2o21rqq -WEBENGAGE_CAMPAIGN_FORGOT_PASSWORD_ID=13co4i3 -INITIAL_ADMIN_EMAIL=admin@example.com -INITIAL_ADMIN_PASSWORD=Test@12345 - - -REDIS_URL=redis://redis:6379/0 diff --git a/backend/app/api/controllers/search_controller.py b/backend/app/api/controllers/search_controller.py index 2ecfd85c95..6712a93488 100644 --- a/backend/app/api/controllers/search_controller.py +++ b/backend/app/api/controllers/search_controller.py @@ -102,8 +102,6 @@ async def search_all_providers( self, user_id: uuid.UUID, query: str, - search_in_content: bool = True, - max_file_size: int = 10 * 1024 * 1024, ) -> JSONResponse: try: if not query or not query.strip(): @@ -115,8 +113,6 @@ async def search_all_providers( results = await self.service.search_all_providers( user_id=user_id, search_query=query, - search_in_content=search_in_content, - max_file_size=max_file_size, ) return self._success( diff --git a/backend/app/api/routes/search.py b/backend/app/api/routes/search.py index 813f1e6eb1..acee8babe7 100644 --- a/backend/app/api/routes/search.py +++ b/backend/app/api/routes/search.py @@ -10,21 +10,12 @@ controller = SearchController() -@router.get("/files") +@router.get("/search-files") async def search_all_providers( query: str = Query(..., description="Search query string (e.g., 'asad')"), - search_in_content: bool = Query( - True, description="Whether to search inside file contents" - ), - max_file_size: int = Query( - 10 * 1024 * 1024, - description="Maximum file size to search content in bytes (default: 10MB)", - ), user_id: uuid.UUID = Depends(get_current_user_id), ) -> JSONResponse: return await controller.search_all_providers( user_id=user_id, query=query, - search_in_content=search_in_content, - max_file_size=max_file_size, ) diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 5f9122d8ac..889167efbb 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -164,12 +164,15 @@ def r2_boto3_config(self) -> dict[str, Any]: GOOGLE_DRIVE_RESPONSE_TYPE: str | None = None GOOGLE_DRIVE_PROMPT: str | None = None GOOGLE_DRIVE_ACCESS_TYPE: str | None = None + GOOGLE_DRIVE_URL: str | None = None # Microsoft OAuth2 settings for OneDrive integration MICROSOFT_CLIENT_ID: str | None = None MICROSOFT_CLIENT_SECRET: str | None = None MICROSOFT_REDIRECT_URI: str | None = None MICROSOFT_TENANT_ID: str | None = None + MICROSOFT_URL: str | None = None + MICROSOFT_GRAPH_URL: str | None = None # Dropbox OAuth2 settings for Dropbox integration DROPBOX_CLIENT_SECRET: str | None = None @@ -179,6 +182,7 @@ def r2_boto3_config(self) -> dict[str, Any]: DROPBOX_SCOPE: str | None = None DROPBOX_RESPONSE_TYPE: str | None = None DROPBOX_AUTHORIZATION_URL: str | None = None + DROPBOX_URL: str | None = None @computed_field # type: ignore[prop-decorator] @property diff --git a/backend/app/services/dropbox_service.py b/backend/app/services/dropbox_service.py index 569bbf874c..1f213ff3ee 100644 --- a/backend/app/services/dropbox_service.py +++ b/backend/app/services/dropbox_service.py @@ -8,7 +8,7 @@ import httpx from dropbox import Dropbox from dropbox.exceptions import ApiError, AuthError -from dropbox.files import FileMetadata +from dropbox.files import FileMetadata, FolderMetadata, SearchOptions from sqlmodel import Session, select from app.core.config import settings @@ -39,7 +39,6 @@ async def connect_dropbox_with_tokens( user_info = await self._get_dropbox_user_info(access_token) provider_account_id = user_info.get("account_id") - # Use scope from settings if not provided final_scope = scope or settings.DROPBOX_SCOPE token_info = { @@ -53,11 +52,9 @@ async def connect_dropbox_with_tokens( else: user_info = token_info - # User ID is required if not user_id: raise ValueError("User ID is required") - # Check if account already exists own_session = None if session is None: own_session = Session(get_engine()) @@ -71,7 +68,6 @@ async def connect_dropbox_with_tokens( existing_account = session.exec(stmt).first() if existing_account: - # Update existing account existing_account.access_token = access_token existing_account.refresh_token = refresh_token existing_account.expires_at = expires_at @@ -83,7 +79,6 @@ async def connect_dropbox_with_tokens( session.refresh(existing_account) return existing_account - # Create new account account = ExternalAccount( user_id=user_id, provider=EXTERNAL_ACCOUNT_PROVIDER.DROPBOX, @@ -102,11 +97,9 @@ async def connect_dropbox_with_tokens( own_session.close() async def _get_dropbox_user_info(self, access_token: str) -> dict[str, Any]: - """Get user information from Dropbox API using access token""" try: dbx = Dropbox(access_token) account = await asyncio.to_thread(dbx.users_get_current_account) - # Convert the account object to dict result: dict[str, Any] = { "account_id": account.account_id, "name": { @@ -136,7 +129,6 @@ async def refresh_dropbox_token( account: ExternalAccount, session: Session | None = None, ) -> ExternalAccount: - """Refresh Dropbox access token using refresh token""" if not account.refresh_token: raise ValueError("No refresh token available") @@ -149,9 +141,7 @@ async def refresh_dropbox_token( session = own_session try: - # Use OAuth2 endpoint for token refresh (standard OAuth2, not Dropbox-specific API) - # The Dropbox SDK doesn't provide a refresh method, so we use httpx for OAuth2 token endpoint - token_url = "https://api.dropbox.com/oauth2/token" + token_url = f"{settings.DROPBOX_URL}/oauth2/token" token_data = { "grant_type": "refresh_token", "refresh_token": account.refresh_token, @@ -173,7 +163,6 @@ async def refresh_dropbox_token( expires_at = datetime.utcnow() + timedelta(seconds=expires_in) refresh_token = token_response.get("refresh_token") or account.refresh_token - # Update account with new tokens account.access_token = access_token account.refresh_token = refresh_token account.expires_at = expires_at @@ -198,20 +187,17 @@ async def refresh_dropbox_token( async def _ensure_valid_token( self, account: ExternalAccount, session: Session | None = None ) -> str: - """Ensure the access token is valid, refresh if necessary""" if account.expires_at and account.expires_at > datetime.utcnow(): return account.access_token or "" if not account.refresh_token: raise ValueError("No refresh token available") - # Refresh token if expired refreshed_account = await self.refresh_dropbox_token(account, session=session) if not refreshed_account.access_token: raise ValueError("No access token available after refresh") return refreshed_account.access_token def get_dropbox_authorization_url(self, state: str | None = None) -> str: - """Generate Dropbox OAuth authorization URL""" if not settings.DROPBOX_CLIENT_ID: raise ValueError("Dropbox CLIENT_ID not configured") @@ -222,7 +208,7 @@ def get_dropbox_authorization_url(self, state: str | None = None) -> str: base_url = ( settings.DROPBOX_AUTHORIZATION_URL - or "https://www.dropbox.com/oauth2/authorize" + or f"{settings.DROPBOX_URL}/oauth2/authorize" ) response_type = settings.DROPBOX_RESPONSE_TYPE or "code" scope = settings.DROPBOX_SCOPE or "" @@ -246,7 +232,6 @@ async def get_all_files_with_namespaces( user_id: uuid.UUID, session: Session | None = None, ) -> dict[str, Any]: - """Get all files organized by namespace (personal, team, etc.)""" namespaces = await self.get_all_namespaces(user_id, session=session) result: dict[str, Any] = { "namespaces": [], @@ -289,7 +274,6 @@ async def get_all_files( user_id: uuid.UUID, session: Session | None = None, ) -> list[dict[str, Any]]: - """Get all files as a flat list without namespace organization""" namespaces = await self.get_all_namespaces(user_id, session=session) all_files: list[dict[str, Any]] = [] @@ -305,7 +289,6 @@ async def get_all_files( namespace_type=namespace.get("namespace_type"), session=session, ) - # Add namespace info to each file for file in files: file_with_namespace = file.copy() file_with_namespace["namespace"] = { @@ -324,7 +307,6 @@ async def get_all_namespaces( user_id: uuid.UUID, session: Session | None = None, ) -> list[dict[str, Any]]: - """Get all namespaces from Dropbox API (personal, team, etc.)""" account = await self.get_dropbox_account(user_id, session=session) if not account: raise ValueError("Dropbox account not connected") @@ -334,7 +316,6 @@ async def get_all_namespaces( namespaces = [] dbx = Dropbox(access_token) - # Get user's personal account info try: account_obj = await asyncio.to_thread(dbx.users_get_current_account) namespaces.append( @@ -359,7 +340,6 @@ async def get_all_namespaces( except Exception as e: logger.error(f"Failed to get personal Dropbox account: {e}") - # Get team namespaces if available (requires team scope) try: team_namespaces = await asyncio.to_thread(dbx.team_namespaces_list) for namespace in team_namespaces.namespaces: @@ -398,7 +378,6 @@ async def get_files_for_namespace( access_token = await self._ensure_valid_token(account, session=session) - # If namespace_type is not provided, determine it by checking if it's the personal account if namespace_type is None: try: dbx_temp = Dropbox(access_token) @@ -415,8 +394,6 @@ async def get_files_for_namespace( ) namespace_type = "team" - # For personal namespace, use regular Dropbox client (no namespace header) - # For team namespaces, use namespace header if namespace_type == "personal": dbx = Dropbox(access_token) else: @@ -435,7 +412,6 @@ async def get_files_for_namespace( def process_entries(entries): for entry in entries: - # Check if entry is a file or folder using isinstance is_file = isinstance(entry, FileMetadata) data = { @@ -482,7 +458,6 @@ async def get_dropbox_account( user_id: uuid.UUID, session: Session | None = None, ) -> ExternalAccount | None: - """Get Dropbox account for user""" own_session = None if session is None: own_session = Session(get_engine()) @@ -505,7 +480,6 @@ async def upload_file_to_dropbox( file_name: str, file_content: bytes, ) -> dict[str, Any]: - """Upload a file to Dropbox""" account = await self.get_dropbox_account(user_id) if not account: raise ValueError("Dropbox account not connected") @@ -515,7 +489,6 @@ async def upload_file_to_dropbox( dbx = Dropbox(access_token) try: - # Upload file using SDK path = f"/{file_name}" mode = dropbox.files.WriteMode.add metadata = await asyncio.to_thread( @@ -526,7 +499,6 @@ async def upload_file_to_dropbox( autorename=True, ) - # Convert metadata to dict result: dict[str, Any] = { "id": metadata.id, "name": metadata.name, @@ -564,7 +536,6 @@ def _get_dbx_with_namespace(self, access_token: str, namespace_id: str) -> Dropb ) async def _list_files_in_namespace(self, dbx: Dropbox) -> list[dict[str, Any]]: - """Helper method to list all files in a namespace using a Dropbox client""" all_files: list[dict[str, Any]] = [] try: @@ -578,7 +549,6 @@ async def _list_files_in_namespace(self, dbx: Dropbox) -> list[dict[str, Any]]: def process_entries(entries): for entry in entries: - # Check if entry is a file or folder using isinstance is_file = isinstance(entry, FileMetadata) data = { @@ -621,9 +591,6 @@ def process_entries(entries): return [] async def get_all_files_combined(self, user_id: uuid.UUID) -> list[dict[str, Any]]: - """ - Get all files: personal + any team/shared namespaces - """ account = await self.get_dropbox_account(user_id) if not account: raise ValueError("Dropbox account not connected") @@ -632,13 +599,11 @@ async def get_all_files_combined(self, user_id: uuid.UUID) -> list[dict[str, Any dbx = Dropbox(access_token) all_files: list[dict[str, Any]] = [] - # 1️⃣ Personal files personal_files = await self._list_files_in_namespace(dbx) for f in personal_files: f["namespace_type"] = "personal" all_files.extend(personal_files) - # 2️⃣ Team/other namespaces (requires team scope) try: team_namespaces = await asyncio.to_thread(dbx.team_namespaces_list) for ns in team_namespaces.namespaces: @@ -660,151 +625,79 @@ async def get_all_files_combined(self, user_id: uuid.UUID) -> list[dict[str, Any return all_files async def search_files( - self, - user_id: uuid.UUID, - query: str, - search_in_content: bool = True, - session: Session | None = None, + self, user_id: uuid.UUID, query: str, session: Session | None = None ) -> list[dict[str, Any]]: - """ - Search files in Dropbox using native search API (files/search_v2). - This searches both filename and content efficiently using Dropbox's indexed search. - """ - account = await self.get_dropbox_account(user_id, session=session) - if not account: - raise ValueError("Dropbox account not connected") + try: + account = await self.get_dropbox_account(user_id, session=session) + if not account: + raise ValueError("Dropbox account not connected") - access_token = await self._ensure_valid_token(account, session=session) - dbx = Dropbox(access_token) + access_token = await self._ensure_valid_token(account, session=session) + dbx = Dropbox(access_token) - all_results: list[dict[str, Any]] = [] + search_options = SearchOptions(path="", max_results=100) - try: - # Use Dropbox search_v2 API which searches both filename and content - from dropbox.files import SearchV2Arg - - # Search in personal namespace - search_arg = SearchV2Arg(query=query) - result = await asyncio.to_thread(dbx.files_search_v2, search_arg) - - def process_matches(matches): - for match in matches: - if hasattr(match, "metadata") and hasattr(match.metadata, "metadata"): - entry = match.metadata.metadata - is_file = isinstance(entry, FileMetadata) - - data = { - "id": entry.id, - "name": entry.name, - "path_lower": entry.path_lower, - "path_display": entry.path_display, - ".tag": "file" if is_file else "folder", - "namespace_type": "personal", - } - - if is_file: - data.update( - { - "size": entry.size, - "rev": entry.rev, - "content_hash": entry.content_hash, - "client_modified": entry.client_modified.isoformat() - if entry.client_modified - else None, - "server_modified": entry.server_modified.isoformat() - if entry.server_modified - else None, - } - ) - - all_results.append(data) - - if hasattr(result, "matches"): - process_matches(result.matches) - - # Handle pagination - while hasattr(result, "has_more") and result.has_more: - if hasattr(result, "cursor"): - search_arg = SearchV2Arg(query=query, cursor=result.cursor) - result = await asyncio.to_thread(dbx.files_search_v2, search_arg) - if hasattr(result, "matches"): - process_matches(result.matches) - else: - break + search_result = await asyncio.to_thread( + dbx.files_search_v2, + query=query, + options=search_options, + ) - # Also search in team namespaces if available - try: - team_namespaces = await asyncio.to_thread(dbx.team_namespaces_list) - for ns in team_namespaces.namespaces: - dbx_ns = self._get_dbx_with_namespace(access_token, ns.namespace_id) - search_arg = SearchV2Arg(query=query) - ns_result = await asyncio.to_thread( - dbx_ns.files_search_v2, search_arg + logger.debug("DROPBOX SEARCH MATCHES: %s", len(search_result.matches)) + logger.debug("DROPBOX SEARCH HAS MORE: %s", search_result.has_more) + logger.debug("DROPBOX SEARCH CURSOR: %s", search_result.cursor) + + all_matches: list[dict[str, Any]] = [] + + while True: + for match in search_result.matches: + metadata = match.metadata.get_metadata() + + if isinstance(metadata, FileMetadata): + all_matches.append( + { + "id": metadata.id, + "name": metadata.name, + "path_lower": metadata.path_lower, + "path_display": metadata.path_display, + "tag": "file", + "size": metadata.size, + "rev": metadata.rev, + "content_hash": metadata.content_hash, + "client_modified": metadata.client_modified.isoformat() + if metadata.client_modified + else None, + "server_modified": metadata.server_modified.isoformat() + if metadata.server_modified + else None, + } + ) + + elif isinstance(metadata, FolderMetadata): + all_matches.append( + { + "id": metadata.id, + "name": metadata.name, + "path_lower": metadata.path_lower, + "path_display": metadata.path_display, + "tag": "folder", + } + ) + + if search_result.has_more and search_result.cursor: + search_result = await asyncio.to_thread( + dbx.files_search_continue_v2, + search_result.cursor, ) + else: + break - # Capture loop variables to avoid B023 error - namespace_id = ns.namespace_id - namespace_name = ns.name - - def process_ns_matches(matches): - for match in matches: - if hasattr(match, "metadata") and hasattr( - match.metadata, "metadata" - ): - entry = match.metadata.metadata - is_file = isinstance(entry, FileMetadata) - - data = { - "id": entry.id, - "name": entry.name, - "path_lower": entry.path_lower, - "path_display": entry.path_display, - ".tag": "file" if is_file else "folder", - "namespace_type": "team", - "namespace_id": namespace_id, - "namespace_name": namespace_name, - } - - if is_file: - data.update( - { - "size": entry.size, - "rev": entry.rev, - "content_hash": entry.content_hash, - "client_modified": entry.client_modified.isoformat() - if entry.client_modified - else None, - "server_modified": entry.server_modified.isoformat() - if entry.server_modified - else None, - } - ) - - all_results.append(data) - - if hasattr(ns_result, "matches"): - process_ns_matches(ns_result.matches) - - # Handle pagination for namespace search - while hasattr(ns_result, "has_more") and ns_result.has_more: - if hasattr(ns_result, "cursor"): - search_arg = SearchV2Arg( - query=query, cursor=ns_result.cursor - ) - ns_result = await asyncio.to_thread( - dbx_ns.files_search_v2, search_arg - ) - if hasattr(ns_result, "matches"): - process_ns_matches(ns_result.matches) - else: - break - except (ApiError, AttributeError) as e: - logger.debug(f"Team namespaces search not available: {e}") + return all_matches except ApiError as e: - logger.error(f"Dropbox search error: {e}") - # Fallback: if search_v2 is not available, return empty results - # The search_v2 API requires Dropbox Business/Professional accounts + logger.error(f"Dropbox API error during search: {e}") return [] - return all_results + except Exception as e: + logger.error(f"Error searching Dropbox: {e}") + return [] diff --git a/backend/app/services/integrations_service.py b/backend/app/services/integrations_service.py index 07f7803c0d..a531269b43 100644 --- a/backend/app/services/integrations_service.py +++ b/backend/app/services/integrations_service.py @@ -27,7 +27,6 @@ async def connect_google_drive_with_tokens( user_id: uuid.UUID | None = None, session: Session | None = None, ) -> ExternalAccount: - """Connect Google Drive account using provided tokens directly""" if not access_token: raise ValueError("Access token is required") @@ -47,11 +46,9 @@ async def connect_google_drive_with_tokens( else: user_info = token_info - # User ID is required if not user_id: raise ValueError("User ID is required") - # Check if account already exists own = False if session is None: session = Session(get_engine()) @@ -65,7 +62,6 @@ async def connect_google_drive_with_tokens( existing_account = session.exec(stmt).first() if existing_account: - # Update existing account existing_account.access_token = access_token existing_account.refresh_token = ( refresh_token or existing_account.refresh_token @@ -79,7 +75,6 @@ async def connect_google_drive_with_tokens( session.refresh(existing_account) return existing_account - # Create new account account = ExternalAccount( user_id=user_id, provider=EXTERNAL_ACCOUNT_PROVIDER.GOOGLE_DRIVE, @@ -98,10 +93,9 @@ async def connect_google_drive_with_tokens( session.close() async def _get_google_user_info(self, access_token: str) -> dict[str, Any]: - """Get user information from Google using access token""" async with httpx.AsyncClient(timeout=10.0) as client: response = await client.get( - "https://www.googleapis.com/oauth2/v2/userinfo", + f"{settings.GOOGLE_DRIVE_URL}/oauth2/v2/userinfo", headers={"Authorization": f"Bearer {access_token}"}, ) if response.status_code != 200: @@ -115,14 +109,13 @@ async def refresh_google_drive_token( account: ExternalAccount, session: Session | None = None, ) -> ExternalAccount: - """Refresh Google Drive access token using refresh token""" if not account.refresh_token: raise ValueError("No refresh token available") if not settings.GOOGLE_CLIENT_ID or not settings.GOOGLE_CLIENT_SECRET: raise ValueError("Google OAuth2 credentials not configured") - token_url = "https://oauth2.googleapis.com/token" + token_url = f"{settings.GOOGLE_DRIVE_URL}/oauth2/token" token_data = { "client_id": settings.GOOGLE_CLIENT_ID, "client_secret": settings.GOOGLE_CLIENT_SECRET, @@ -165,7 +158,6 @@ async def get_google_drive_account( user_id: uuid.UUID, session: Session | None = None, ) -> ExternalAccount | None: - """Get Google Drive account for user""" own = False if session is None: session = Session(get_engine()) @@ -185,7 +177,6 @@ async def get_google_drive_account( async def _ensure_valid_token( self, account: ExternalAccount, session: Session | None = None ) -> str: - """Ensure access token is valid, refresh if needed""" if account.expires_at and account.expires_at <= datetime.utcnow(): if account.refresh_token: account = await self.refresh_google_drive_token( @@ -206,32 +197,27 @@ async def upload_file_to_google_drive( parent_folder_id: str | None = None, session: Session | None = None, ) -> dict[str, Any]: - """Upload a file to Google Drive""" account = await self.get_google_drive_account(user_id, session=session) if not account: raise ValueError("Google Drive account not connected") access_token = await self._ensure_valid_token(account, session=session) - # Upload file metadata first metadata: dict[str, Any] = { "name": file_name, } if parent_folder_id: metadata["parents"] = [parent_folder_id] - # Create multipart upload boundary = secrets.token_urlsafe(16) body_parts: list[str | bytes] = [] - # Metadata part body_parts.append( f"--{boundary}\r\n" f"Content-Type: application/json; charset=UTF-8\r\n\r\n" f"{json.dumps(metadata)}\r\n" ) - # File content part body_parts.append(f"--{boundary}\r\nContent-Type: {mime_type}\r\n\r\n") body_parts.append(file_content) body_parts.append(f"\r\n--{boundary}--\r\n") @@ -241,7 +227,7 @@ async def upload_file_to_google_drive( for part in body_parts ) - url = "https://www.googleapis.com/upload/drive/v3/files?uploadType=multipart" + url = f"{settings.GOOGLE_DRIVE_URL}/upload/drive/v3/files?uploadType=multipart" headers = { "Authorization": f"Bearer {access_token}", "Content-Type": f"multipart/related; boundary={boundary}", @@ -265,7 +251,6 @@ async def list_google_drive_files( query: str | None = None, session: Session | None = None, ) -> dict[str, Any]: - """List all files in Google Drive""" account = await self.get_google_drive_account(user_id, session=session) if not account: raise ValueError("Google Drive account not connected") @@ -281,7 +266,7 @@ async def list_google_drive_files( if query: params["q"] = query - url = "https://www.googleapis.com/drive/v3/files" + url = f"{settings.GOOGLE_DRIVE_URL}/drive/v3/files" headers = {"Authorization": f"Bearer {access_token}"} async with httpx.AsyncClient(timeout=30.0) as client: @@ -300,15 +285,13 @@ async def read_google_drive_file( file_id: str, session: Session | None = None, ) -> dict[str, Any]: - """Read file content from Google Drive""" account = await self.get_google_drive_account(user_id, session=session) if not account: raise ValueError("Google Drive account not connected") access_token = await self._ensure_valid_token(account, session=session) - # First get file metadata - metadata_url = f"https://www.googleapis.com/drive/v3/files/{file_id}" + metadata_url = f"{settings.GOOGLE_DRIVE_URL}/drive/v3/files/{file_id}" metadata_headers = {"Authorization": f"Bearer {access_token}"} metadata_params = { "fields": "id, name, mimeType, size, createdTime, modifiedTime, webViewLink" @@ -327,9 +310,8 @@ async def read_google_drive_file( file_metadata = metadata_response.json() - # Get file content content_url = ( - f"https://www.googleapis.com/drive/v3/files/{file_id}?alt=media" + f"{settings.GOOGLE_DRIVE_URL}/drive/v3/files/{file_id}?alt=media" ) content_headers = {"Authorization": f"Bearer {access_token}"} @@ -342,7 +324,6 @@ async def read_google_drive_file( content_type = content_response.headers.get( "Content-Type", "application/octet-stream" ) - # Base64 encode content for JSON response content_base64 = base64.b64encode(content_response.content).decode("utf-8") return { @@ -358,15 +339,13 @@ async def download_google_drive_file( file_id: str, session: Session | None = None, ) -> tuple[bytes, str, dict[str, Any]]: - """Download file content from Google Drive (returns raw bytes for streaming)""" account = await self.get_google_drive_account(user_id, session=session) if not account: raise ValueError("Google Drive account not connected") access_token = await self._ensure_valid_token(account, session=session) - # Get file metadata - metadata_url = f"https://www.googleapis.com/drive/v3/files/{file_id}" + metadata_url = f"{settings.GOOGLE_DRIVE_URL}/drive/v3/files/{file_id}" metadata_headers = {"Authorization": f"Bearer {access_token}"} metadata_params = { "fields": "id, name, mimeType, size, createdTime, modifiedTime" @@ -385,9 +364,8 @@ async def download_google_drive_file( file_metadata = metadata_response.json() - # Get file content content_url = ( - f"https://www.googleapis.com/drive/v3/files/{file_id}?alt=media" + f"{settings.GOOGLE_DRIVE_URL}/drive/v3/files/{file_id}?alt=media" ) content_headers = {"Authorization": f"Bearer {access_token}"} @@ -416,14 +394,12 @@ async def update_google_drive_file( mime_type: str | None = None, session: Session | None = None, ) -> dict[str, Any]: - """Update file content and/or metadata in Google Drive""" account = await self.get_google_drive_account(user_id, session=session) if not account: raise ValueError("Google Drive account not connected") access_token = await self._ensure_valid_token(account, session=session) - # If updating both content and metadata, use multipart upload if file_content is not None and ( file_name is not None or mime_type is not None ): @@ -434,7 +410,6 @@ async def update_google_drive_file( boundary = secrets.token_urlsafe(16) body_parts: list[str | bytes] = [] - # Metadata part if metadata: body_parts.append( f"--{boundary}\r\n" @@ -442,7 +417,6 @@ async def update_google_drive_file( f"{json.dumps(metadata)}\r\n" ) - # File content part content_type = mime_type or "application/octet-stream" body_parts.append(f"--{boundary}\r\nContent-Type: {content_type}\r\n\r\n") body_parts.append(file_content) @@ -453,7 +427,7 @@ async def update_google_drive_file( for part in body_parts ) - url = f"https://www.googleapis.com/upload/drive/v3/files/{file_id}?uploadType=multipart" + url = f"{settings.GOOGLE_DRIVE_URL}/upload/drive/v3/files/{file_id}?uploadType=multipart" headers = { "Authorization": f"Bearer {access_token}", "Content-Type": f"multipart/related; boundary={boundary}", @@ -469,9 +443,8 @@ async def update_google_drive_file( multipart_result: dict[str, Any] = response.json() return multipart_result - # If only updating content elif file_content is not None: - url = f"https://www.googleapis.com/upload/drive/v3/files/{file_id}?uploadType=media" + url = f"{settings.GOOGLE_DRIVE_URL}/upload/drive/v3/files/{file_id}?uploadType=media" headers = { "Authorization": f"Bearer {access_token}", "Content-Type": mime_type or "application/octet-stream", @@ -491,10 +464,9 @@ async def update_google_drive_file( content_result: dict[str, Any] = response.json() return content_result - # If only updating metadata elif file_name is not None: metadata = {"name": file_name} - url = f"https://www.googleapis.com/drive/v3/files/{file_id}" + url = f"{settings.GOOGLE_DRIVE_URL}/drive/v3/files/{file_id}" headers = { "Authorization": f"Bearer {access_token}", "Content-Type": "application/json", @@ -522,37 +494,46 @@ async def search_google_drive_files( search_in_content: bool = True, session: Session | None = None, ) -> list[dict[str, Any]]: - """ - Search files in Google Drive using native search API. - This searches both filename and content efficiently using Google's indexed search. - """ account = await self.get_google_drive_account(user_id, session=session) if not account: raise ValueError("Google Drive account not connected") access_token = await self._ensure_valid_token(account, session=session) - headers = {"Authorization": f"Bearer {access_token}"} + headers = { + "Authorization": f"Bearer {access_token}", + "Accept": "application/json", + } - all_results: list[dict[str, Any]] = [] + results: list[dict[str, Any]] = [] - # Google Drive search query: search in both name and fullText (content) - # The fullText contains operator searches within file contents for supported file types - # Escape single quotes in the query by replacing them with escaped version escaped_query = query.replace("'", "\\'") + if search_in_content: - search_query = f"name contains '{escaped_query}' or fullText contains '{escaped_query}'" + q = ( + f"(name contains '{escaped_query}' " + f"or fullText contains '{escaped_query}') " + f"and trashed = false" + ) else: - search_query = f"name contains '{escaped_query}'" + q = f"name contains '{escaped_query}' and trashed = false" - url = "https://www.googleapis.com/drive/v3/files" params: dict[str, Any] = { - "q": search_query, + "q": q, "pageSize": 100, - "fields": "nextPageToken, files(id, name, mimeType, size, createdTime, modifiedTime, webViewLink, webContentLink)", + "fields": ( + "nextPageToken, " + "files(id, name, mimeType, size, " + "createdTime, modifiedTime, " + "webViewLink, webContentLink, parents)" + ), + "supportsAllDrives": True, + "includeItemsFromAllDrives": True, + "corpora": "user", } async with httpx.AsyncClient(timeout=30.0) as client: - page_token = None + page_token: str | None = None + while True: if page_token: params["pageToken"] = page_token @@ -560,21 +541,45 @@ async def search_google_drive_files( params.pop("pageToken", None) try: - response = await client.get(url, headers=headers, params=params) - if response.status_code != 200: - error_detail = response.text - logger.error(f"Failed to search Google Drive files: {error_detail}") - break + resp = await client.get( + f"{settings.GOOGLE_DRIVE_URL}/drive/v3/files", + headers=headers, + params=params, + ) - result: dict[str, Any] = response.json() - files = result.get("files", []) - all_results.extend(files) + if resp.status_code != 200: + logger.error("Google Drive search failed: %s", resp.text) + break - page_token = result.get("nextPageToken") + data = resp.json() + + for item in data.get("files", []): + results.append( + { + "id": item.get("id"), + "name": item.get("name"), + "mime_type": item.get("mimeType"), + "size": item.get("size"), + "created_time": item.get("createdTime"), + "modified_time": item.get("modifiedTime"), + "web_url": item.get("webViewLink"), + "download_url": item.get("webContentLink"), + "provider": "google_drive", + "type": ( + "folder" + if item.get("mimeType") + == "application/vnd.google-apps.folder" + else "file" + ), + } + ) + + page_token = data.get("nextPageToken") if not page_token: break - except Exception as e: - logger.error(f"Error searching Google Drive: {e}") + + except Exception: + logger.exception("Error searching Google Drive") break - return all_results + return results diff --git a/backend/app/services/one_drive_service.py b/backend/app/services/one_drive_service.py index 3ffa637d6a..907e12bc51 100644 --- a/backend/app/services/one_drive_service.py +++ b/backend/app/services/one_drive_service.py @@ -7,6 +7,7 @@ import httpx from sqlmodel import Session, select +from app.core.config import settings from app.core.db import get_engine from app.enums.external_account_enum import EXTERNAL_ACCOUNT_PROVIDER from app.models.external_account import ExternalAccount @@ -25,7 +26,6 @@ async def connect_one_drive_with_tokens( user_id: uuid.UUID | None = None, session: Session | None = None, ) -> ExternalAccount: - """Connect One Drive account using provided tokens directly""" if not access_token: raise ValueError("Access token is required") @@ -47,11 +47,9 @@ async def connect_one_drive_with_tokens( else: user_info = token_info - # User ID is required if not user_id: raise ValueError("User ID is required") - # Check if account already exists own_session = None if session is None: own_session = Session(get_engine()) @@ -65,7 +63,6 @@ async def connect_one_drive_with_tokens( existing_account = session.exec(stmt).first() if existing_account: - # Update existing account existing_account.access_token = access_token existing_account.expires_at = expires_at existing_account.provider_account_id = provider_account_id @@ -76,7 +73,6 @@ async def connect_one_drive_with_tokens( session.refresh(existing_account) return existing_account - # Create new account account = ExternalAccount( user_id=user_id, provider=EXTERNAL_ACCOUNT_PROVIDER.ONE_DRIVE, @@ -94,10 +90,9 @@ async def connect_one_drive_with_tokens( own_session.close() async def _get_one_drive_user_info(self, access_token: str) -> dict[str, Any]: - """Get user information from Microsoft Graph API using access token""" async with httpx.AsyncClient(timeout=10.0) as client: response = await client.get( - "https://graph.microsoft.com/v1.0/me", + f"{settings.MICROSOFT_URL}/v1.0/me", headers={"Authorization": f"Bearer {access_token}"}, ) if response.status_code != 200: @@ -109,12 +104,10 @@ async def _get_one_drive_user_info(self, access_token: str) -> dict[str, Any]: async def _ensure_valid_token( self, account: ExternalAccount, session: Session | None = None ) -> str: - """Ensure the access token is valid, refresh if necessary""" if account.expires_at and account.expires_at > datetime.utcnow(): return account.access_token or "" if not account.refresh_token: raise ValueError("No refresh token available") - # TODO: Implement token refresh logic for Microsoft if not account.access_token: raise ValueError("No access token available") return account.access_token @@ -124,7 +117,6 @@ async def get_all_tenants( user_id: uuid.UUID, session: Session | None = None, ) -> list[dict[str, Any]]: - """Get all sites/tenants from Microsoft Graph API including personal OneDrive""" account = await self.get_one_drive_account(user_id, session=session) if not account: raise ValueError("OneDrive account not connected") @@ -135,10 +127,9 @@ async def get_all_tenants( async with httpx.AsyncClient(timeout=30.0) as client: headers = {"Authorization": f"Bearer {access_token}"} - # Get user's personal OneDrive try: response = await client.get( - "https://graph.microsoft.com/v1.0/me/drive", + f"{settings.MICROSOFT_GRAPH_URL}/v1.0/me/drive", headers=headers, ) if response.status_code == 200: @@ -155,8 +146,7 @@ async def get_all_tenants( except Exception as e: logger.error(f"Failed to get personal OneDrive: {e}") - # Get all SharePoint sites - url = "https://graph.microsoft.com/v1.0/sites?search=*" + url = f"{settings.MICROSOFT_GRAPH_URL}/v1.0/sites?search=*" while url: try: response = await client.get(url, headers=headers) @@ -168,10 +158,9 @@ async def get_all_tenants( for site in data.get("value", []): site_id = site.get("id") if site_id: - # Get drive for each site try: drive_response = await client.get( - f"https://graph.microsoft.com/v1.0/sites/{site_id}/drive", + f"{settings.MICROSOFT_GRAPH_URL}/v1.0/sites/{site_id}/drive", headers=headers, ) if drive_response.status_code == 200: @@ -185,7 +174,6 @@ async def get_all_tenants( tenants.extend(data.get("value", [])) - # Check for next page url = data.get("@odata.nextLink") except Exception as e: logger.error(f"Error fetching sites: {e}") @@ -199,7 +187,6 @@ async def get_files_for_tenant( site_id: str, session: Session | None = None, ) -> list[dict[str, Any]]: - """Get all files for a specific tenant/site (recursively)""" account = await self.get_one_drive_account(user_id, session=session) if not account: raise ValueError("OneDrive account not connected") @@ -209,16 +196,12 @@ async def get_files_for_tenant( all_files = [] headers = {"Authorization": f"Bearer {access_token}"} - # Determine base URL based on site_id if site_id == "personal" or site_id.startswith("drive-"): - # Personal OneDrive - base_url = "https://graph.microsoft.com/v1.0/me/drive" + base_url = f"{settings.MICROSOFT_GRAPH_URL}/v1.0/me/drive" else: - # SharePoint site - base_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drive" + base_url = f"{settings.MICROSOFT_GRAPH_URL}/v1.0/sites/{site_id}/drive" async def get_files_recursive(folder_id: str = "root") -> None: - """Recursively get all files from a folder""" url = ( f"{base_url}/items/{folder_id}/children" if folder_id != "root" @@ -241,22 +224,18 @@ async def get_files_recursive(folder_id: str = "root") -> None: items = data.get("value", []) for item in items: - # Add file to results all_files.append(item) - # If it's a folder, recursively get its contents if item.get("folder"): item_id = item.get("id") if item_id: await get_files_recursive(folder_id=item_id) - # Check for next page current_url = data.get("@odata.nextLink") except Exception as e: logger.error(f"Error fetching files from {current_url}: {e}") break - # Start recursive file fetching await get_files_recursive() return all_files @@ -266,7 +245,6 @@ async def get_all_files_with_tenants( user_id: uuid.UUID, session: Session | None = None, ) -> dict[str, Any]: - """Get all files organized by tenant""" tenants = await self.get_all_tenants(user_id, session=session) result: dict[str, Any] = { "tenants": [], @@ -280,7 +258,6 @@ async def get_all_files_with_tenants( continue try: - # Use drive ID for personal OneDrive, site ID for SharePoint if drive_type == "personal": drive_id = tenant.get("drive", {}).get("id") tenant_id = drive_id or "personal" @@ -314,7 +291,6 @@ async def get_one_drive_account( user_id: uuid.UUID, session: Session | None = None, ) -> ExternalAccount | None: - """Get OneDrive account for user""" own_session = None if session is None: own_session = Session(get_engine()) @@ -337,17 +313,14 @@ async def upload_file_to_one_drive( file_name: str, file_content: bytes, ) -> dict[str, Any]: - """Upload a file to One Drive""" account = await self.get_one_drive_account(user_id) if not account: raise ValueError("OneDrive account not connected") access_token = await self._ensure_valid_token(account) - # Microsoft Graph API requires the filename to be URL-encoded in the path - # Format: /me/drive/root:/{filename}:/content encoded_filename = quote(file_name) - url = f"https://graph.microsoft.com/v1.0/me/drive/root:/{encoded_filename}:/content" + url = f"{settings.MICROSOFT_GRAPH_URL}/v1.0/me/drive/root:/{encoded_filename}:/content" headers = { "Authorization": f"Bearer {access_token}", @@ -377,93 +350,73 @@ async def search_files( search_in_content: bool = True, session: Session | None = None, ) -> list[dict[str, Any]]: - """ - Search files in OneDrive using native Microsoft Graph search API. - This searches both filename and content efficiently using Microsoft's indexed search. - """ account = await self.get_one_drive_account(user_id, session=session) if not account: raise ValueError("OneDrive account not connected") access_token = await self._ensure_valid_token(account, session=session) - headers = {"Authorization": f"Bearer {access_token}"} + headers = { + "Authorization": f"Bearer {access_token}", + "Accept": "application/json", + } - all_results: list[dict[str, Any]] = [] + results: list[dict[str, Any]] = [] async with httpx.AsyncClient(timeout=30.0) as client: - # Search in personal OneDrive - # Microsoft Graph search API searches both filename and content - # URL encode the query parameter - encoded_query = quote(query, safe="") - search_url = f"https://graph.microsoft.com/v1.0/me/drive/root/search(q='{encoded_query}')" - - try: - response = await client.get(search_url, headers=headers) - if response.status_code == 200: - data = response.json() - items = data.get("value", []) - for item in items: - item["tenant"] = {"driveType": "personal", "name": "Personal OneDrive"} - all_results.append(item) - - # Handle pagination - next_link = data.get("@odata.nextLink") - while next_link: - response = await client.get(next_link, headers=headers) - if response.status_code == 200: - data = response.json() - items = data.get("value", []) - for item in items: - item["tenant"] = {"driveType": "personal", "name": "Personal OneDrive"} - all_results.append(item) - next_link = data.get("@odata.nextLink") - else: - break - except Exception as e: - logger.error(f"Error searching personal OneDrive: {e}") - # Also search in SharePoint sites/tenants - try: - tenants = await self.get_all_tenants(user_id, session=session) - for tenant in tenants: - site_id = tenant.get("id") - drive_type = tenant.get("driveType", "sharepoint") - - if drive_type == "personal": - continue # Already searched above - - if not site_id: - continue + async def fetch_search(url: str, tenant: dict): + while url: + resp = await client.get(url, headers=headers) + if resp.status_code != 200: + logger.warning("OneDrive search failed: %s", resp.text) + return + + data = resp.json() + for item in data.get("value", []): + if not search_in_content: + if query.lower() not in item.get("name", "").lower(): + continue + + results.append( + { + "id": item.get("id"), + "name": item.get("name"), + "path": item.get("parentReference", {}).get("path"), + "web_url": item.get("webUrl"), + "size": item.get("size"), + "provider": "onedrive", + "type": "file" if "file" in item else "folder", + "last_modified": item.get("lastModifiedDateTime"), + "tenant": tenant, + } + ) - try: - # Search in SharePoint site drive - # URL encode the query parameter - encoded_query = quote(query, safe="") - site_search_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drive/root/search(q='{encoded_query}')" - response = await client.get(site_search_url, headers=headers) - if response.status_code == 200: - data = response.json() - items = data.get("value", []) - for item in items: - item["tenant"] = tenant - all_results.append(item) - - # Handle pagination - next_link = data.get("@odata.nextLink") - while next_link: - response = await client.get(next_link, headers=headers) - if response.status_code == 200: - data = response.json() - items = data.get("value", []) - for item in items: - item["tenant"] = tenant - all_results.append(item) - next_link = data.get("@odata.nextLink") - else: - break - except Exception as e: - logger.debug(f"Error searching tenant {site_id}: {e}") - except Exception as e: - logger.debug(f"Error getting tenants for search: {e}") + url = data.get("@odata.nextLink") + + personal_url = ( + f"{settings.MICROSOFT_GRAPH_URL}/v1.0/me/drive/root/search(q='{query}')" + ) + + await fetch_search( + personal_url, + {"driveType": "personal", "name": "Personal OneDrive"}, + ) + + tenants = await self.get_all_tenants(user_id, session=session) + + for tenant in tenants: + if tenant.get("driveType") == "personal": + continue + + site_id = tenant.get("id") + if not site_id: + continue + + site_url = ( + f"{settings.MICROSOFT_GRAPH_URL}/v1.0/sites/{site_id}" + f"/drive/root/search(q='{query}')" + ) + + await fetch_search(site_url, tenant) - return all_results + return results diff --git a/backend/app/services/search_service.py b/backend/app/services/search_service.py index e015d1f5ed..9d30435c9a 100644 --- a/backend/app/services/search_service.py +++ b/backend/app/services/search_service.py @@ -1,4 +1,3 @@ -import asyncio import logging import uuid from typing import Any @@ -22,8 +21,6 @@ async def search_all_providers( self, user_id: uuid.UUID, search_query: str, - search_in_content: bool = True, - max_file_size: int = 10 * 1024 * 1024, # 10MB default session: Session | None = None, ) -> dict[str, Any]: if not search_query or not search_query.strip(): @@ -39,46 +36,26 @@ async def search_all_providers( search_query_lower = search_query.lower().strip() - # Search all providers in parallel - dropbox_task = self._search_dropbox( - user_id, search_query_lower, search_in_content, max_file_size, session - ) - one_drive_task = self._search_one_drive( - user_id, search_query_lower, search_in_content, max_file_size, session - ) - google_drive_task = self._search_google_drive( - user_id, search_query_lower, search_in_content, max_file_size, session - ) - - dropbox_results, one_drive_results, google_drive_results = await asyncio.gather( - dropbox_task, one_drive_task, google_drive_task, return_exceptions=True - ) - - # Handle exceptions - if isinstance(dropbox_results, Exception): - logger.error(f"Dropbox search error: {dropbox_results}") - dropbox_results = {"files": [], "total": 0, "error": str(dropbox_results)} - if isinstance(one_drive_results, Exception): - logger.error(f"OneDrive search error: {one_drive_results}") - one_drive_results = { - "files": [], - "total": 0, - "error": str(one_drive_results), - } - if isinstance(google_drive_results, Exception): - logger.error(f"Google Drive search error: {google_drive_results}") - google_drive_results = { - "files": [], - "total": 0, - "error": str(google_drive_results), + try: + dropbox_results = await self._search_dropbox( + user_id, search_query_lower, session + ) + one_drive_results = await self._search_one_drive( + user_id, search_query_lower, session + ) + google_drive_results = await self._search_google_drive( + user_id, search_query_lower, session + ) + except Exception as e: + logger.error(f"Search error: {e}") + return { + "query": search_query, + "results": { + "dropbox": {"files": [], "total": 0, "error": str(e)}, + "one_drive": {"files": [], "total": 0, "error": str(e)}, + "google_drive": {"files": [], "total": 0, "error": str(e)}, + }, } - - total_files = ( - dropbox_results.get("total", 0) - + one_drive_results.get("total", 0) - + google_drive_results.get("total", 0) - ) - return { "query": search_query, "results": { @@ -86,30 +63,23 @@ async def search_all_providers( "one_drive": one_drive_results, "google_drive": google_drive_results, }, - "total_files": total_files, } async def _search_dropbox( self, user_id: uuid.UUID, search_query: str, - search_in_content: bool, - max_file_size: int, session: Session | None = None, ) -> dict[str, Any]: try: - # Use native Dropbox search API (searches both filename and content) matching_files = await self.dropbox_service.search_files( user_id=user_id, query=search_query, - search_in_content=search_in_content, session=session, ) - - # Add provider and match_type to each result + logger.debug(f"DROPBOX SEARCH RESULTS: {len(matching_files)} files found") for file in matching_files: file["provider"] = "dropbox" - # Dropbox search API searches both filename and content, so we mark as "both" file["match_type"] = "both" return { @@ -125,23 +95,17 @@ async def _search_one_drive( self, user_id: uuid.UUID, search_query: str, - search_in_content: bool, - max_file_size: int, session: Session | None = None, ) -> dict[str, Any]: try: - # Use native Microsoft Graph search API (searches both filename and content) matching_files = await self.one_drive_service.search_files( user_id=user_id, query=search_query, - search_in_content=search_in_content, session=session, ) - # Add provider and match_type to each result for file in matching_files: file["provider"] = "one_drive" - # Microsoft Graph search API searches both filename and content, so we mark as "both" file["match_type"] = "both" return { @@ -157,23 +121,19 @@ async def _search_google_drive( self, user_id: uuid.UUID, search_query: str, - search_in_content: bool, - max_file_size: int, session: Session | None = None, ) -> dict[str, Any]: try: - # Use native Google Drive search API (searches both filename and content) matching_files = await self.google_drive_service.search_google_drive_files( user_id=user_id, query=search_query, - search_in_content=search_in_content, session=session, ) - - # Add provider and match_type to each result + logger.debug( + f"GOOGLE DRIVE SEARCH RESULTS: {len(matching_files)} files found" + ) for file in matching_files: file["provider"] = "google_drive" - # Google Drive search API searches both filename and content, so we mark as "both" file["match_type"] = "both" return { diff --git a/backend/tests/api/routes/test_dropbox.py b/backend/tests/api/routes/test_dropbox.py index 9ce9464482..4c5b6dba4f 100644 --- a/backend/tests/api/routes/test_dropbox.py +++ b/backend/tests/api/routes/test_dropbox.py @@ -59,9 +59,13 @@ def test_connect_dropbox_with_tokens_success( self, client: TestClient, auth_headers: dict[str, str], test_user: User, db: Session ): """Test successful Dropbox connection.""" + # expires_in should be ISO 8601 format string for Dropbox (not integer) + expires_at = (datetime.utcnow() + timedelta(seconds=3600)).isoformat() + # Ensure it's a string type + assert isinstance(expires_at, str), "expires_at must be a string" token_data = { "access_token": "new_token", - "expires_in": 3600, + "expires_in": expires_at, # ISO 8601 string format "token_type": "Bearer", "refresh_token": "refresh_token", "scope": "files.content.read files.content.write", diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index 1004515bc6..daf965b6be 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -10,6 +10,7 @@ from app.core.db import get_engine from app.enums.user_enum import UserRole from app.main import app +from app.models.external_account import ExternalAccount from app.models.otp import OTP from app.models.user import User @@ -87,11 +88,30 @@ def db() -> Generator[Session, None, None]: with Session(get_engine()) as session: yield session # Clean up created rows after the test session - statement = delete(OTP) - session.execute(statement) - statement = delete(User) - session.execute(statement) - session.commit() + # Delete in order to respect foreign key constraints + try: + # Delete OTP first (no foreign key dependencies) + statement = delete(OTP) + session.execute(statement) + session.commit() + except Exception: + session.rollback() + + try: + # Delete ExternalAccount before User (due to foreign key constraint) + statement = delete(ExternalAccount) + session.execute(statement) + session.commit() + except Exception: + session.rollback() + + try: + # Finally delete User + statement = delete(User) + session.execute(statement) + session.commit() + except Exception: + session.rollback() @pytest.fixture(scope="module") diff --git a/backend/tests/unit/test_one_drive_service.py b/backend/tests/unit/test_one_drive_service.py index b0c32f30cd..11ec0e737e 100644 --- a/backend/tests/unit/test_one_drive_service.py +++ b/backend/tests/unit/test_one_drive_service.py @@ -119,6 +119,13 @@ async def test_connect_one_drive_with_tokens_update_existing( db.add(existing_account) db.commit() db.refresh(existing_account) + + # Store the original updated_at timestamp + import time + original_updated_at = existing_account.updated_at + # Add a delay to ensure there's a clear time difference when we update + # The service will set updated_at = datetime.utcnow() on update + time.sleep(0.2) # Increased delay to ensure timestamp difference mock_user_info = {"id": "new_user_123", "displayName": "Updated User"} @@ -149,7 +156,13 @@ async def __aexit__(self, exc_type, exc_val, exc_tb): assert account.id == existing_account.id assert account.access_token == "new_token" assert account.provider_account_id == "new_user_123" - assert account.updated_at > existing_account.updated_at + # The service explicitly sets updated_at = datetime.utcnow() on update + # Verify the account was updated by checking updated_at + # Since we added a delay before the update, updated_at should be greater + db.refresh(account) # Ensure we have the latest from database + assert account.updated_at >= original_updated_at + # With the delay, it should be strictly greater + assert account.updated_at > original_updated_at @pytest.mark.asyncio async def test_connect_one_drive_with_tokens_user_info_failure( @@ -279,16 +292,18 @@ async def test_ensure_valid_token_no_refresh_token(self, service, test_user, db: @pytest.mark.asyncio async def test_ensure_valid_token_no_access_token(self, service, test_user, db: Session): - """Test with no access token.""" + """Test with no access token but with refresh token.""" account = ExternalAccount( user_id=test_user.id, provider=EXTERNAL_ACCOUNT_PROVIDER.ONE_DRIVE, access_token=None, - expires_at=None, + expires_at=None, # Expired or None so it doesn't return early + refresh_token="refresh_token_123", # Has refresh token so it gets past that check ) db.add(account) db.commit() + # With refresh_token present but no access_token, it should raise "No access token available" with pytest.raises(ValueError, match="No access token available"): await service._ensure_valid_token(account, session=db)