diff --git a/client/joinly_client/client.py b/client/joinly_client/client.py index d64cab7..588524e 100644 --- a/client/joinly_client/client.py +++ b/client/joinly_client/client.py @@ -1,4 +1,5 @@ import asyncio +import base64 import contextlib import json import logging @@ -19,6 +20,7 @@ Transcript, TranscriptSegment, Usage, + VideoSnapshot, ) from joinly_client.utils import is_async_context, name_in_transcript @@ -410,6 +412,23 @@ async def send_chat_message(self, message: str) -> None: arguments={"message": message}, ) + async def get_video_snapshot(self) -> VideoSnapshot: + """Get a snapshot of the current video feed. + + Returns: + VideoSnapshot: The snapshot with raw image data and media type. + """ + if not self.joined: + msg = "Not joined to a meeting" + raise RuntimeError(msg) + + result = await self.client.call_tool("get_video_snapshot") + content = result.content[0] + return VideoSnapshot( + data=base64.b64decode(content.data), # type: ignore[union-attr] + media_type=content.mimeType, # type: ignore[union-attr] + ) + async def share_screen(self, url: str) -> None: """Start sharing screen in the meeting. diff --git a/client/joinly_client/types.py b/client/joinly_client/types.py index 5324084..adf463e 100644 --- a/client/joinly_client/types.py +++ b/client/joinly_client/types.py @@ -13,6 +13,7 @@ Transcript, TranscriptSegment, Usage, + VideoSnapshot, ) from mcp.types import CallToolResult @@ -26,6 +27,7 @@ "Transcript", "TranscriptSegment", "Usage", + "VideoSnapshot", ] type ToolExecutor = Callable[[str, dict[str, Any]], Awaitable[Any]] diff --git a/common/joinly_common/types.py b/common/joinly_common/types.py index 5247ba0..0b7df67 100644 --- a/common/joinly_common/types.py +++ b/common/joinly_common/types.py @@ -1,6 +1,7 @@ from collections.abc import Iterable from decimal import ROUND_HALF_UP, Decimal from enum import Enum +from typing import Literal from pydantic import ( BaseModel, @@ -167,6 +168,18 @@ def compact(self, max_gap: float = 0.5) -> "Transcript": return Transcript(segments=compacted) +class VideoSnapshot(BaseModel): + """A snapshot of the meeting video feed. + + Attributes: + data (bytes): The raw image data. + media_type (Literal["image/jpeg", "image/png"]): The media type of the image. + """ + + data: bytes + media_type: Literal["image/jpeg", "image/png"] = "image/jpeg" + + class MeetingChatMessage(BaseModel): """A class to represent a chat message in a meeting. diff --git a/joinly/types.py b/joinly/types.py index fa433ef..b77e6dc 100644 --- a/joinly/types.py +++ b/joinly/types.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from typing import Literal from joinly_common.types import ( MeetingChatHistory, @@ -11,6 +10,7 @@ Transcript, TranscriptSegment, Usage, + VideoSnapshot, ) __all__ = [ @@ -23,6 +23,7 @@ "Transcript", "TranscriptSegment", "Usage", + "VideoSnapshot", ] @@ -92,16 +93,3 @@ class SpeechWindow: time_ns: int is_speech: bool speaker: str | None = None - - -@dataclass(frozen=True, slots=True) -class VideoSnapshot: - """A class to represent a snapshot of video data. - - Attributes: - data (bytes): The raw video data. - media_type (Literal["image/png"]): The media type of the video snapshot. - """ - - data: bytes - media_type: Literal["image/png", "image/jpeg"] = "image/png" diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 6f33d67..b25de2a 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -18,7 +18,6 @@ """ import asyncio -import base64 import io import os from collections.abc import AsyncIterator @@ -75,12 +74,6 @@ def _red_ratio(image_data: bytes) -> float: return red_count / len(pixels) -async def _snapshot_image(bot: JoinlyClient) -> bytes: - """Take a video snapshot and return the raw image bytes.""" - result = await bot.client.call_tool("get_video_snapshot") - return base64.b64decode(result.content[0].data) # type: ignore[union-attr] - - async def _transcript_text(bot: JoinlyClient) -> str: """Get the full transcript text from a bot, lowercased.""" transcript = await bot.get_transcript() @@ -198,19 +191,19 @@ async def test_screen_share( # share red page and verify >30% near-pure red pixels await bot_a.share_screen(_RED_PAGE) await asyncio.sleep(8) - ratio = _red_ratio(await _snapshot_image(bot_b)) + ratio = _red_ratio((await bot_b.get_video_snapshot()).data) assert ratio > 0.3, f"Only {ratio:.0%} red pixels during share" # noqa: PLR2004 # stop and verify red is gone await bot_a.stop_sharing() await asyncio.sleep(5) - ratio = _red_ratio(await _snapshot_image(bot_b)) + ratio = _red_ratio((await bot_b.get_video_snapshot()).data) assert ratio < 0.05, f"Still {ratio:.0%} red after stop" # noqa: PLR2004 # re-share to verify share works again after stop await bot_a.share_screen(_RED_PAGE) await asyncio.sleep(5) - ratio = _red_ratio(await _snapshot_image(bot_b)) + ratio = _red_ratio((await bot_b.get_video_snapshot()).data) assert ratio > 0.3, f"Only {ratio:.0%} red on re-share" # noqa: PLR2004 await bot_a.stop_sharing() await asyncio.sleep(2) @@ -279,7 +272,7 @@ async def test_video_snapshot_is_valid_image( """Video snapshot should be a decodable JPEG image of reasonable size.""" bot_a, _bot_b = bots - img_bytes = await _snapshot_image(bot_a) + img_bytes = (await bot_a.get_video_snapshot()).data img = Image.open(io.BytesIO(img_bytes)) assert img.format == "JPEG", f"Expected JPEG, got {img.format}"