-
Notifications
You must be signed in to change notification settings - Fork 153
Open
Description
By root message I mean the first original message from which a thread starts.

In an attempt to capture the root message of a thread (both 1:1 and Space) from which the webhook was called,
starter = api.messages.get( parent_id )
works for 1:1 not in a Space, throws error:
Traceback (most recent call last):
File "/app/webex-bot/app.py", line 174, in collect_thread_text_and_attachments
starter = api.messages.get(parent_id)
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/api/messages.py", line 339, in get
json_data = self._session.get(API_ENDPOINT + "/" + messageId)
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/restsession.py", line 428, in get
response = self.request("GET", url, erc, params=params, **kwargs)
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/restsession.py", line 393, in request
check_response_code(response, erc)
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/utils.py", line 207, in check_response_code
raise ApiError(response)
webexpythonsdk.exceptions.ApiError: [404] Not Found - Unable to get message. [Tracking ID: ROUTERGW_add856f9-8590-471d-a355-3b4b2405fdb9]
Also, starter_candidates = list( api.messages.list( roomId = room_id, max = 5, beforeMessage = parent_id ) )
works 1:1 but not in Space, throws error:
Traceback (most recent call last):
File "/app/webex-bot/app.py", line 179, in collect_thread_text_and_attachments
starter_candidates = list( api.messages.list( roomId = room_id, max = 5, beforeMessage = parent_id ) )
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/api/messages.py", line 138, in list
for item in items:
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/restsession.py", line 502, in get_items
for json_page in pages:
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/restsession.py", line 455, in get_pages
response = self.request("GET", url, erc, params=params, **kwargs)
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/restsession.py", line 393, in request
check_response_code(response, erc)
File "/app/webex-bot/pyvenv/lib/python3.10/site-packages/webexpythonsdk/utils.py", line 207, in check_response_code
raise ApiError(response)
webexpythonsdk.exceptions.ApiError: [403] Forbidden - Failed to get activity. [Tracking ID: ROUTERGW_48f845fd-b077-4dab-8ec8-57291972479f]
This piece works fine to capture the root message of a 1:1 thread but fails to capture the root message of a thread in a Space.
def collect_thread_text_and_attachments(msg) -> tuple[str, list[str]]:
"""
Robustly collect thread text + attachments. Works in 1:1 but not in spaces.
Strategy:
1) Try api.messages.get(parent_id)
2) If that fails, scan recent messages in the room up to MAX_SCAN to find the parent
3) If still not found, try beforeMessage(parent_id) as a fallback
4) Always include replies (list parentId=...) ordered oldest->newest
5) Ensure the incoming message 'msg' is present
6) If starter can't be found, add a placeholder notice
Returns (thread_text, [attachment_text]) where attachment_text is list with single big string
"""
author_cache = {}
thread_text_lines = []
attachment_blocks = []
def process_single_message(m):
author = get_display_name(getattr(m, "personId", "unknown"), author_cache)
mtext = (getattr(m, "text", "") or "").strip()
if mtext:
thread_text_lines.append(f"[{author}]: {mtext}")
if getattr(m, "files", None):
for f_url in m.files:
try:
content, fname, ctype = download_webex_file(f_url)
extracted = extract_text_from_file(content, fname, ctype)
attachment_blocks.append(f"[Attachment {fname}]:\n{extracted}")
except Exception as e:
# keep going; record the error in attachments so user sees it
attachment_blocks.append(f"[Attachment error for {fname}]: {e}")
parent_id = getattr(msg, "parentId", None)
room_id = getattr(msg, "roomId", None)
messages_to_process = []
found_starter = None
starter_unavailable = False
if parent_id and room_id:
# 1) Try to fetch starter directly (works in many cases)
try:
starter = api.messages.get(parent_id)
found_starter = starter
except Exception as ex_get:
# failed to get the parent message (common in spaces)
# 2) Try scanning recent messages in the room to find that id (paginated)
MAX_SCAN = 500 # <= number of messages to scan; adjust as needed
scanned = 0
try:
for m in api.messages.list(roomId=room_id, max=100):
scanned += 1
if getattr(m, "id", None) == parent_id:
found_starter = m
break
if scanned >= MAX_SCAN:
break
except Exception:
# scanning may also fail due to permissions; ignore and fallback
pass
# 3) fallback: try beforeMessage (sometimes works)
if not found_starter:
try:
candidates = list(api.messages.list(roomId=room_id, max=1, beforeMessage=parent_id))
if candidates:
found_starter = candidates[0]
except Exception:
pass
if not found_starter:
starter_unavailable = True # note that we couldn't retrieve the starter
# If we found a starter, add it first
if found_starter:
messages_to_process.append(found_starter)
# Collect replies (newest-first), reverse to oldest->newest
try:
replies = list(api.messages.list(roomId=room_id, parentId=parent_id, max=100))
replies.reverse()
messages_to_process.extend(replies)
messages_to_process.pop()
except Exception:
# if replies cannot be fetched, continue - we'll at least include incoming message
pass
# Ensure incoming 'msg' is present (sometimes it's not in replies list)
if not any(getattr(m, "id", None) == getattr(msg, "id", None) for m in messages_to_process):
messages_to_process.append(msg)
# If we couldn't find the starter, insert a placeholder at the top (so LLM sees lack of context)
if starter_unavailable:
thread_text_lines.append("[Starter message unavailable — bot may have joined after the thread started or lacks permission to read the original message.]")
else:
# Not a thread or missing metadata: just process the single message
messages_to_process = [msg]
# Now process messages in order, avoid duplicates
seen_ids = set()
for m in messages_to_process:
mid = getattr(m, "id", None)
if mid and mid in seen_ids:
continue
if mid:
seen_ids.add(mid)
process_single_message(m)
# Combine, guardrail sizes
thread_text = "\n".join(thread_text_lines)
MAX_CHARS = 60_000
if len(thread_text) > MAX_CHARS:
thread_text = thread_text[:MAX_CHARS] + "\n...[truncated]"
att_text = "\n\n".join(attachment_blocks)
if len(att_text) > MAX_CHARS:
att_text = att_text[:MAX_CHARS] + "\n...[attachments truncated]"
return thread_text, [att_text] if att_text else []
Metadata
Metadata
Assignees
Labels
No labels