Skip to content

How to capture root message of a thread in a Space? #265

@ddebta

Description

@ddebta

By root message I mean the first original message from which a thread starts.

Image

In an attempt to capture the root message of a thread (both 1:1 and Space) from which the webhook was called,

starter = api.messages.get( parent_id ) works for 1:1 not in a Space, throws error:

Traceback (most recent call last):
File "/app/webex-bot/app.py", line 174, in collect_thread_text_and_attachments
starter = api.messages.get(parent_id)
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/api/messages.py", line 339, in get
json_data = self._session.get(API_ENDPOINT + "/" + messageId)
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/restsession.py", line 428, in get
response = self.request("GET", url, erc, params=params, **kwargs)
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/restsession.py", line 393, in request
check_response_code(response, erc)
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/utils.py", line 207, in check_response_code
raise ApiError(response)
webexpythonsdk.exceptions.ApiError: [404] Not Found - Unable to get message. [Tracking ID: ROUTERGW_add856f9-8590-471d-a355-3b4b2405fdb9]  

Also, starter_candidates = list( api.messages.list( roomId = room_id, max = 5, beforeMessage = parent_id ) ) works 1:1 but not in Space, throws error:

Traceback (most recent call last):
File "/app/webex-bot/app.py", line 179, in collect_thread_text_and_attachments
starter_candidates = list( api.messages.list( roomId = room_id, max = 5, beforeMessage = parent_id ) )
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/api/messages.py", line 138, in list
for item in items:
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/restsession.py", line 502, in get_items
for json_page in pages:
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/restsession.py", line 455, in get_pages
response = self.request("GET", url, erc, params=params, **kwargs)
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/restsession.py", line 393, in request
check_response_code(response, erc)
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/utils.py", line 207, in check_response_code
raise ApiError(response)
webexpythonsdk.exceptions.ApiError: [403] Forbidden - Failed to get activity. [Tracking ID: ROUTERGW_48f845fd-b077-4dab-8ec8-57291972479f] 

This piece works fine to capture the root message of a 1:1 thread but fails to capture the root message of a thread in a Space.

def collect_thread_text_and_attachments(msg) -> tuple[str, list[str]]:
    """
    Robustly collect thread text + attachments. Works in 1:1 but not in spaces.
    Strategy:
      1) Try api.messages.get(parent_id)
      2) If that fails, scan recent messages in the room up to MAX_SCAN to find the parent
      3) If still not found, try beforeMessage(parent_id) as a fallback
      4) Always include replies (list parentId=...) ordered oldest->newest
      5) Ensure the incoming message 'msg' is present
      6) If starter can't be found, add a placeholder notice
    Returns (thread_text, [attachment_text]) where attachment_text is list with single big string
    """
    author_cache = {}
    thread_text_lines = []
    attachment_blocks = []

    def process_single_message(m):
        author = get_display_name(getattr(m, "personId", "unknown"), author_cache)
        mtext = (getattr(m, "text", "") or "").strip()
        if mtext:
            thread_text_lines.append(f"[{author}]: {mtext}")

        if getattr(m, "files", None):
            for f_url in m.files:
                try:
                    content, fname, ctype = download_webex_file(f_url)
                    extracted = extract_text_from_file(content, fname, ctype)
                    attachment_blocks.append(f"[Attachment {fname}]:\n{extracted}")
                except Exception as e:
                    # keep going; record the error in attachments so user sees it
                    attachment_blocks.append(f"[Attachment error for {fname}]: {e}")

    parent_id = getattr(msg, "parentId", None)
    room_id = getattr(msg, "roomId", None)

    messages_to_process = []
    found_starter = None
    starter_unavailable = False

    if parent_id and room_id:
        # 1) Try to fetch starter directly (works in many cases)
        try:
            starter = api.messages.get(parent_id)
            found_starter = starter
        except Exception as ex_get:
            # failed to get the parent message (common in spaces)
            # 2) Try scanning recent messages in the room to find that id (paginated)
            MAX_SCAN = 500  # <= number of messages to scan; adjust as needed
            scanned = 0
            try:
                for m in api.messages.list(roomId=room_id, max=100):
                    scanned += 1
                    if getattr(m, "id", None) == parent_id:
                        found_starter = m
                        break
                    if scanned >= MAX_SCAN:
                        break
            except Exception:
                # scanning may also fail due to permissions; ignore and fallback
                pass

            # 3) fallback: try beforeMessage (sometimes works)
            if not found_starter:
                try:
                    candidates = list(api.messages.list(roomId=room_id, max=1, beforeMessage=parent_id))
                    if candidates:
                        found_starter = candidates[0]
                except Exception:
                    pass

            if not found_starter:
                starter_unavailable = True  # note that we couldn't retrieve the starter

        # If we found a starter, add it first
        if found_starter:
            messages_to_process.append(found_starter)

        # Collect replies (newest-first), reverse to oldest->newest
        try:
            replies = list(api.messages.list(roomId=room_id, parentId=parent_id, max=100))
            replies.reverse()            
            messages_to_process.extend(replies)
            messages_to_process.pop()
        except Exception:
            # if replies cannot be fetched, continue - we'll at least include incoming message
            pass

        # Ensure incoming 'msg' is present (sometimes it's not in replies list)
        if not any(getattr(m, "id", None) == getattr(msg, "id", None) for m in messages_to_process):
            messages_to_process.append(msg)

        # If we couldn't find the starter, insert a placeholder at the top (so LLM sees lack of context)
        if starter_unavailable:
            thread_text_lines.append("[Starter message unavailable — bot may have joined after the thread started or lacks permission to read the original message.]")

    else:
        # Not a thread or missing metadata: just process the single message
        messages_to_process = [msg]

    # Now process messages in order, avoid duplicates
    seen_ids = set()
    for m in messages_to_process:
        mid = getattr(m, "id", None)
        if mid and mid in seen_ids:
            continue
        if mid:
            seen_ids.add(mid)
        process_single_message(m)

    # Combine, guardrail sizes
    thread_text = "\n".join(thread_text_lines)
    MAX_CHARS = 60_000
    if len(thread_text) > MAX_CHARS:
        thread_text = thread_text[:MAX_CHARS] + "\n...[truncated]"

    att_text = "\n\n".join(attachment_blocks)
    if len(att_text) > MAX_CHARS:
        att_text = att_text[:MAX_CHARS] + "\n...[attachments truncated]"

    return thread_text, [att_text] if att_text else []  

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions