diff --git a/backend/app/services/report_agent.py b/backend/app/services/report_agent.py
index cecd70b464..f65e7457f9 100644
--- a/backend/app/services/report_agent.py
+++ b/backend/app/services/report_agent.py
@@ -1064,6 +1064,42 @@ def _execute_tool(self, tool_name: str, parameters: Dict[str, Any], report_conte
     # 合法的工具名称集合，用于裸 JSON 兜底解析时校验
     VALID_TOOL_NAMES = {"insight_forge", "panorama_search", "quick_search", "interview_agents"}
 
+    def _sanitize_section_content(self, content: str, section_title: str = "") -> str:
+        """
+        Bereinigt einen LLM-Output, der als Section-Content geschrieben werden soll.
+
+        Wenn der Content NUR aus einem unausgeführten tool_call besteht
+        (z.B. `{"name":"quick_search","parameters":{...}}`), wäre das nutzlos im
+        finalen Report — siehe github.com/666ghj/MiroFish#599.
+
+        Returns: bereinigter Content. Bei reinem tool_call-Leak: Fallback-Hinweis.
+        """
+        if not content:
+            return content
+        cleaned = re.sub(r'<tool_call>.*?</tool_call>', '', content, flags=re.DOTALL)
+        cleaned = re.sub(r'\[TOOL_CALL\].*?\)', '', cleaned)
+        cleaned = cleaned.strip()
+        if not cleaned:
+            return f"_(Keine Inhalte verfügbar für: {section_title})_" if section_title else "_(Keine Inhalte)_"
+        # Detect: ist der ganze Content ein einzelnes tool_call JSON?
+        try:
+            parsed = json.loads(cleaned)
+            if isinstance(parsed, dict):
+                tool_name = parsed.get("name") or parsed.get("tool")
+                if tool_name and tool_name in self.VALID_TOOL_NAMES:
+                    logger.warning(
+                        "Section '%s' content is raw tool_call (tool=%s) — replaced with fallback",
+                        section_title, tool_name
+                    )
+                    return (
+                        f"_(Hinweis: Für diesen Abschnitt konnte das Tool `{tool_name}` "
+                        f"innerhalb der Iterations-Limits nicht ausgeführt werden. "
+                        f"Bitte Report neu generieren oder Modell-Konfiguration prüfen.)_"
+                    )
+        except (json.JSONDecodeError, TypeError):
+            pass
+        return cleaned
+
     def _parse_tool_calls(self, response: str) -> List[Dict[str, Any]]:
         """
         从LLM响应中解析工具调用
@@ -1390,6 +1426,7 @@ def _generate_section_react(
 
                 # 正常结束
                 final_answer = response.split("Final Answer:")[-1].strip()
+                final_answer = self._sanitize_section_content(final_answer, section.title)
                 logger.info(t('report.sectionGenDone', title=section.title, count=tool_calls_count))
 
                 if self.report_logger:
@@ -1488,7 +1525,7 @@ def _generate_section_react(
             # 工具调用已足够，LLM 输出了内容但没带 "Final Answer:" 前缀
             # 直接将这段内容作为最终答案，不再空转
             logger.info(t('report.sectionNoPrefix', title=section.title, count=tool_calls_count))
-            final_answer = response.strip()
+            final_answer = self._sanitize_section_content(response.strip(), section.title)
 
             if self.report_logger:
                 self.report_logger.log_section_content(
@@ -1517,6 +1554,8 @@ def _generate_section_react(
             final_answer = response.split("Final Answer:")[-1].strip()
         else:
             final_answer = response
+        # Sanitize: kein raw tool_call JSON als Content schreiben (Bug #599)
+        final_answer = self._sanitize_section_content(final_answer, section.title)
         
         # 记录章节内容生成完成日志
         if self.report_logger:
@@ -1809,16 +1848,33 @@ def chat(
 
         # 构建消息
         messages = [{"role": "system", "content": system_prompt}]
-        
-        # 添加历史对话
-        for h in chat_history[-10:]:  # 限制历史长度
-            messages.append(h)
-        
+
+        # 添加历史对话— defensiv: nur {role, content}, keine Duplikate der aktuellen Frage
+        # Fix für github.com/666ghj/MiroFish#577 (Chat wiederholt erste Antwort):
+        # Wenn das Frontend versehentlich die aktuelle User-Nachricht im chat_history mitschickt,
+        # würde der LLM die Frage als "schon gestellt" sehen und die alte Antwort wiederholen.
+        for h in chat_history[-10:]:
+            if not isinstance(h, dict):
+                continue
+            role = h.get("role")
+            content = h.get("content")
+            if role not in ("user", "assistant") or not content:
+                continue
+            # Skip falls dies bereits die aktuelle User-Frage ist
+            if role == "user" and content.strip() == message.strip():
+                continue
+            messages.append({"role": role, "content": content})
+
         # 添加用户消息
         messages.append({
-            "role": "user", 
+            "role": "user",
             "content": message
         })
+
+        logger.debug(
+            "report_agent.chat: total_messages=%d history_len=%d current_msg_len=%d",
+            len(messages), len(chat_history), len(message)
+        )
         
         # ReACT循环（简化版）
         tool_calls_made = []
diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py
index 6c1a81f49b..570813e15b 100644
--- a/backend/app/utils/llm_client.py
+++ b/backend/app/utils/llm_client.py
@@ -5,11 +5,120 @@
 
 import json
 import re
+import logging
 from typing import Optional, Dict, Any, List
 from openai import OpenAI
 
 from ..config import Config
 
+logger = logging.getLogger(__name__)
+
+
+def _parse_llm_json(response: str) -> Dict[str, Any]:
+    """
+    Robuster JSON-Parser für LLM-Outputs.
+
+    LLMs (besonders qwen, gemma, ollama-Modelle) hängen oft Trailing-Text
+    nach dem JSON an, auch mit response_format=json_object. Außerdem werden
+    JSON-Blöcke häufig in ```json ... ``` Markdown-Fences gewrappt.
+
+    Strategie:
+    1. Markdown-Fences entfernen
+    2. json.loads (strict, schnellster Weg)
+    3. raw_decode (parsed Prefix, ignoriert Trailing-Text)
+    4. Balanced-Brace-Extraktion (sucht erste vollständige {...} Struktur)
+    5. Strip Control-Chars + Retry
+    Bei allen Fehlern: ValueError mit hilfreichem Snippet.
+
+    Fixes:
+    - github.com/666ghj/MiroFish#624 ("Unexpected non-whitespace character after JSON at position N")
+    - github.com/666ghj/MiroFish#622 (duplikat)
+    - github.com/666ghj/MiroFish#601 (500 error on ontology/generate mit qwen-plus/ollama)
+    """
+    if not response or not response.strip():
+        raise ValueError("LLM lieferte leere Antwort")
+
+    # 1. Strip Markdown-Fences
+    cleaned = response.strip()
+    cleaned = re.sub(r'^```(?:json|JSON)?\s*\n?', '', cleaned)
+    cleaned = re.sub(r'\n?```\s*$', '', cleaned)
+    cleaned = cleaned.strip()
+
+    # 2. Schneller Pfad: vollständiges JSON
+    try:
+        return json.loads(cleaned)
+    except json.JSONDecodeError as e_strict:
+        first_error = e_strict
+
+    # 3. raw_decode — parsed JSON-Prefix, ignoriert Trailing-Text
+    try:
+        decoder = json.JSONDecoder()
+        obj, end_idx = decoder.raw_decode(cleaned)
+        trailing = cleaned[end_idx:].strip()
+        if trailing:
+            logger.warning(
+                "LLM appended trailing text after JSON (%d chars), ignored. Preview: %s",
+                len(trailing), trailing[:120]
+            )
+        if isinstance(obj, dict):
+            return obj
+        if isinstance(obj, list):
+            # Wrap in dict für Konsistenz mit chat_json-Erwartung
+            return {"items": obj}
+    except json.JSONDecodeError:
+        pass
+
+    # 4. Balanced-Brace-Extraktion: find first complete {...}
+    start = cleaned.find('{')
+    if start >= 0:
+        depth = 0
+        in_string = False
+        escape = False
+        for i in range(start, len(cleaned)):
+            ch = cleaned[i]
+            if escape:
+                escape = False
+                continue
+            if ch == '\\' and in_string:
+                escape = True
+                continue
+            if ch == '"':
+                in_string = not in_string
+                continue
+            if in_string:
+                continue
+            if ch == '{':
+                depth += 1
+            elif ch == '}':
+                depth -= 1
+                if depth == 0:
+                    candidate = cleaned[start:i + 1]
+                    try:
+                        result = json.loads(candidate)
+                        logger.warning(
+                            "Extracted JSON from messy LLM output (%d chars before, %d after)",
+                            start, len(cleaned) - (i + 1)
+                        )
+                        return result
+                    except json.JSONDecodeError:
+                        break
+
+    # 5. Letzter Versuch: control chars entfernen + retry
+    sanitized = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f]', '', cleaned)
+    if sanitized != cleaned:
+        try:
+            return json.loads(sanitized)
+        except json.JSONDecodeError:
+            pass
+
+    # Alle Strategien fehlgeschlagen — sprechende Fehlermeldung
+    snippet = cleaned[:200] + ('...' if len(cleaned) > 200 else '')
+    raise ValueError(
+        f"LLM返回的JSON格式无效 (alle Parse-Strategien fehlgeschlagen): "
+        f"first_error={first_error.msg} at pos {first_error.pos}. "
+        f"Response-Preview: {snippet}"
+    )
+
 
 class LLMClient:
     """LLM客户端"""
@@ -90,14 +199,5 @@ def chat_json(
             max_tokens=max_tokens,
             response_format={"type": "json_object"}
         )
-        # 清理markdown代码块标记
-        cleaned_response = response.strip()
-        cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE)
-        cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response)
-        cleaned_response = cleaned_response.strip()
-
-        try:
-            return json.loads(cleaned_response)
-        except json.JSONDecodeError:
-            raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}")
+        return _parse_llm_json(response)