Skip to content

Commit 6f971a8

Browse files
blightbowclaude
andcommitted
fix(discourse): handle modern Discourse API response shapes
Three Discourse-specific fixes prompted by a 1.0.0 user report: 1. tags shape drift (Discourse PR #36678, merged 2026-02-02 in topic_tags_mixin.rb): the `tags` field globally flipped from list[str] to list[{id, name, slug}] across all topic-bearing serializers. The "expected str instance, dict found" crash from ", ".join(tags) is fixed by extracting `name` per item, and we accept both shapes for older instances. 2. search title was always wrong (latent since day one): SearchPostSerializer has never emitted topic_title — we silently fell back to post.get('name'), which is the user's display name, so search results were headlined with the poster's name instead of the topic title. Look up the title from the parallel topics[] array via topic_id (the topic_map already exists for this). 3. mega-topic detection: topics with >=10000 posts omit post_stream.stream and emit isMegaTopic+lastId instead. We silently surfaced only the first 20 posts. Detect the flag and prepend a clear truncation note so callers know the rest is unavailable via the topic endpoint. Also drop a dead read of topics[].views in search formatting — SearchTopicListItemSerializer never emits this field, so it was always None and silently hidden by the if-guard. Tests: rebuild SAMPLE_SEARCH_RESPONSE to mirror Discourse's actual shape (no topic_title on posts, no views on topics, name field is the user display name). Add four regressions: tags as legacy strings, tags as modern dicts, missing tags, mega-topic note, title-from-topics-array, and unknown-topic-id fallback. 850 pass, ty clean. Bump to 1.0.1. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 250d7d2 commit 6f971a8

4 files changed

Lines changed: 110 additions & 19 deletions

File tree

manifest.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"manifest_version": "0.4",
33
"name": "parkour-mcp",
44
"display_name": "Parkour",
5-
"version": "1.0.0",
5+
"version": "1.0.1",
66
"description": "A content exploration toolkit that helps LLMs surface high signal, unsummarized web content.",
77
"long_description": "MCP server providing a content exploration and research synthesis pipeline. Uses clean first-party APIs to surface and explore web content without summarization. Integrates Kagi, Semantic Scholar, arXiv, deps.dev, IETF, GitHub, MediaWiki, Reddit, Discourse, and DOI resolution APIs into a unified tool suite.\n\n**Note:** The web_fetch_js tool (for JavaScript-rendered pages) requires a one-time browser install. Run `playwright install webkit` from any terminal. The other 11 tools work without this step.",
88
"homepage": "https://github.com/blightbow/parkour-mcp",

parkour_mcp/discourse.py

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -325,11 +325,28 @@ def _format_topic(data: dict, all_posts: list[dict]) -> tuple[str, str]:
325325
f"{views} views",
326326
_format_timestamp(created),
327327
]
328-
tags = data.get("tags") or []
329-
if tags:
330-
meta.append("tags: " + ", ".join(tags))
328+
# Discourse's `tags` field is a list of {id, name, slug} dicts on modern
329+
# instances; older instances returned bare strings. Accept both.
330+
tags_raw = data.get("tags") or []
331+
tag_names = [
332+
t if isinstance(t, str) else t.get("name", "")
333+
for t in tags_raw
334+
]
335+
tag_names = [n for n in tag_names if n]
336+
if tag_names:
337+
meta.append("tags: " + ", ".join(tag_names))
331338
parts.append(" | ".join(meta) + "\n")
332339

340+
# Mega-topic warning: topics with >=10000 posts omit `post_stream.stream`
341+
# entirely and emit `isMegaTopic: true, lastId: <int>` instead. We can't
342+
# batch-fetch the remaining posts, so only the inline ~20 are included.
343+
if data.get("post_stream", {}).get("isMegaTopic"):
344+
parts.append(
345+
f"> **Note:** This is a mega-topic ({posts_count} posts total). "
346+
f"Only the first {len(all_posts)} posts are shown — Discourse "
347+
f"does not expose the full post stream for very large topics.\n"
348+
)
349+
333350
# Posts
334351
for post in all_posts:
335352
post_num = post.get("post_number", 0)
@@ -465,25 +482,22 @@ def _format_search_results(data: dict, base_url: str, limit: int = 10) -> str:
465482
parts: list[str] = []
466483

467484
if posts:
468-
# Build topic_id → topic info map for enrichment
485+
# Build topic_id → topic info map for enrichment. SearchPostSerializer
486+
# does NOT emit topic_title — the title must come from the parallel
487+
# topics[] array.
469488
topic_map = {t["id"]: t for t in topics}
470489

471490
for i, post in enumerate(posts, 1):
472491
topic_id = post.get("topic_id", 0)
473-
topic_title = post.get("topic_title", post.get("name", "Untitled"))
492+
topic_info = topic_map.get(topic_id, {})
493+
topic_title = topic_info.get("title", "Untitled")
474494
username = post.get("username", "unknown")
475495
post_num = post.get("post_number", 1)
476496
blurb = post.get("blurb", "")
477-
478-
topic_info = topic_map.get(topic_id, {})
479497
reply_count = topic_info.get("reply_count", 0)
480-
views = topic_info.get("views")
481498

482499
parts.append(f"{i}. **{topic_title}**")
483-
stats = f"{reply_count} replies"
484-
if views:
485-
stats += f", {views} views"
486-
parts.append(f" @{username} (post #{post_num}) | {stats}")
500+
parts.append(f" @{username} (post #{post_num}) | {reply_count} replies")
487501
parts.append(f" {base_url}/t/{topic_id}/{post_num}")
488502
if blurb:
489503
parts.append(f" {blurb[:200]}")

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "parkour-mcp"
3-
version = "1.0.0"
3+
version = "1.0.1"
44
description = "A content exploration toolkit that helps LLMs surface high signal, unsummarized web content."
55
readme = "README.md"
66
license = "MIT"

tests/test_discourse.py

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,10 @@ def _make_post(
6060
"reply_count": 1,
6161
"like_count": 5,
6262
"category_id": 7,
63-
"tags": ["test", "meta"],
63+
"tags": [
64+
{"id": 1, "name": "test", "slug": "test"},
65+
{"id": 2, "name": "meta", "slug": "meta"},
66+
],
6467
"created_at": "2026-04-01T10:00:00.000Z",
6568
"slug": "test-topic-title",
6669
"chunk_size": 20,
@@ -71,27 +74,31 @@ def _make_post(
7174
}
7275

7376
SAMPLE_SEARCH_RESPONSE = {
77+
# Mirrors meta.discourse.org's actual /search.json shape: SearchPostSerializer
78+
# does NOT emit topic_title (the post's `name` is the user's display name),
79+
# and SearchTopicListItemSerializer does NOT emit `views`. Title and reply
80+
# counts must come from the parallel topics[] array.
7481
"posts": [
7582
{
7683
"id": 2001,
7784
"topic_id": 100,
78-
"topic_title": "How to install Discourse",
7985
"username": "admin",
86+
"name": "Admin User",
8087
"post_number": 1,
8188
"blurb": "Follow these steps to install Discourse on your server...",
8289
},
8390
{
8491
"id": 2002,
8592
"topic_id": 200,
86-
"topic_title": "Discourse plugin development",
8793
"username": "dev",
94+
"name": "Dev User",
8895
"post_number": 3,
8996
"blurb": "Creating plugins requires understanding the Ember frontend...",
9097
},
9198
],
9299
"topics": [
93-
{"id": 100, "title": "How to install Discourse", "reply_count": 5, "views": 100},
94-
{"id": 200, "title": "Discourse plugin development", "reply_count": 12, "views": 250},
100+
{"id": 100, "title": "How to install Discourse", "reply_count": 5},
101+
{"id": 200, "title": "Discourse plugin development", "reply_count": 12},
95102
],
96103
}
97104

@@ -232,6 +239,51 @@ def test_post_content_cleaned(self):
232239
assert "upload://" not in md
233240
assert "[image]" in md
234241

242+
def test_tags_legacy_string_shape(self):
243+
"""Older Discourse instances return tags as a list of bare strings."""
244+
topic = {**SAMPLE_TOPIC_RESPONSE, "tags": ["alpha", "beta"]}
245+
_, md = _format_topic(topic, SAMPLE_POSTS)
246+
assert "tags: alpha, beta" in md
247+
248+
def test_tags_modern_dict_shape(self):
249+
"""Modern Discourse returns tags as {id, name, slug} dicts."""
250+
topic = {
251+
**SAMPLE_TOPIC_RESPONSE,
252+
"tags": [
253+
{"id": 10, "name": "alpha", "slug": "alpha"},
254+
{"id": 20, "name": "beta", "slug": "beta"},
255+
],
256+
}
257+
_, md = _format_topic(topic, SAMPLE_POSTS)
258+
assert "tags: alpha, beta" in md
259+
260+
def test_tags_missing(self):
261+
topic = {k: v for k, v in SAMPLE_TOPIC_RESPONSE.items() if k != "tags"}
262+
_, md = _format_topic(topic, SAMPLE_POSTS)
263+
assert "tags:" not in md
264+
265+
def test_mega_topic_emits_truncation_note(self):
266+
"""Topics with >=10k posts omit post_stream.stream and set
267+
isMegaTopic: true. We can only surface the inline posts and must
268+
clearly tell the caller the rest is unavailable."""
269+
topic = {
270+
**SAMPLE_TOPIC_RESPONSE,
271+
"posts_count": 12345,
272+
"post_stream": {
273+
"isMegaTopic": True,
274+
"lastId": 99999,
275+
"posts": SAMPLE_POSTS,
276+
},
277+
}
278+
_, md = _format_topic(topic, SAMPLE_POSTS)
279+
assert "mega-topic" in md
280+
assert "12345 posts total" in md
281+
assert f"first {len(SAMPLE_POSTS)} posts" in md
282+
283+
def test_normal_topic_no_mega_note(self):
284+
_, md = _format_topic(SAMPLE_TOPIC_RESPONSE, SAMPLE_POSTS)
285+
assert "mega-topic" not in md
286+
235287

236288
# ---------------------------------------------------------------------------
237289
# _split_by_posts
@@ -315,6 +367,31 @@ def test_limit(self):
315367
assert "How to install Discourse" in result
316368
assert "Discourse plugin development" not in result
317369

370+
def test_title_comes_from_topics_array(self):
371+
"""Regression: SearchPostSerializer never emits topic_title.
372+
373+
Earlier code fell back to post.get('name'), which is the user's
374+
display name (e.g. 'Sam Saffron'), not the topic title. The title
375+
must be looked up from the parallel topics[] array via topic_id.
376+
"""
377+
result = _format_search_results(SAMPLE_SEARCH_RESPONSE, BASE_URL)
378+
# Real titles from topics[] are present
379+
assert "How to install Discourse" in result
380+
assert "Discourse plugin development" in result
381+
# User display names from posts[].name must NOT leak into headlines
382+
assert "**Admin User**" not in result
383+
assert "**Dev User**" not in result
384+
385+
def test_unknown_topic_id_falls_back_to_untitled(self):
386+
"""If a search post references a topic_id missing from topics[],
387+
the title falls back to 'Untitled' rather than crashing."""
388+
data = {
389+
"posts": [{"id": 1, "topic_id": 999, "username": "u", "post_number": 1, "blurb": ""}],
390+
"topics": [],
391+
}
392+
result = _format_search_results(data, BASE_URL)
393+
assert "Untitled" in result
394+
318395

319396
# ---------------------------------------------------------------------------
320397
# _format_latest

0 commit comments

Comments
 (0)