From fe8949ca1c2240da8fca9a5cb1d901b83fbe7e33 Mon Sep 17 00:00:00 2001 From: octo-patch Date: Sun, 26 Apr 2026 11:29:26 +0800 Subject: [PATCH] fix: handle string attributes in graph ontology to prevent TypeError crash (fixes #135) When the LLM returns ontology attributes as plain strings instead of dicts, set_ontology() crashes with "TypeError: string indices must be integers, not 'str'" at attr_def["name"]. Two-layer fix: 1. ontology_generator.py: normalize string attrs to {"name", "type", "description"} dicts during validation, so downstream code always receives well-formed structures. 2. graph_builder.py: add isinstance guard as a safety net in set_ontology() for both entity and edge attribute loops. Co-Authored-By: Octopus --- backend/app/services/graph_builder.py | 18 ++++++++++++++---- backend/app/services/ontology_generator.py | 10 ++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/backend/app/services/graph_builder.py b/backend/app/services/graph_builder.py index 37c9969c7..a882377f2 100644 --- a/backend/app/services/graph_builder.py +++ b/backend/app/services/graph_builder.py @@ -233,8 +233,13 @@ def safe_attr_name(attr_name: str) -> str: annotations = {} for attr_def in entity_def.get("attributes", []): - attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称 - attr_desc = attr_def.get("description", attr_name) + # LLM may return attributes as strings instead of dicts + if isinstance(attr_def, str): + attr_name = safe_attr_name(attr_def) + attr_desc = attr_def + else: + attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称 + attr_desc = attr_def.get("description", attr_name) # Zep API 需要 Field 的 description,这是必需的 attrs[attr_name] = Field(description=attr_desc, default=None) annotations[attr_name] = Optional[EntityText] # 类型注解 @@ -257,8 +262,13 @@ def safe_attr_name(attr_name: str) -> str: annotations = {} for attr_def in edge_def.get("attributes", []): - attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称 - attr_desc = attr_def.get("description", attr_name) + # LLM may return attributes as strings instead of dicts + if isinstance(attr_def, str): + attr_name = safe_attr_name(attr_def) + attr_desc = attr_def + else: + attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称 + attr_desc = attr_def.get("description", attr_name) # Zep API 需要 Field 的 description,这是必需的 attrs[attr_name] = Field(description=attr_desc, default=None) annotations[attr_name] = Optional[str] # 边属性用str类型 diff --git a/backend/app/services/ontology_generator.py b/backend/app/services/ontology_generator.py index 01a3d799a..e1796b8f7 100644 --- a/backend/app/services/ontology_generator.py +++ b/backend/app/services/ontology_generator.py @@ -298,6 +298,11 @@ def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]: entity_name_map[original_name] = entity["name"] if "attributes" not in entity: entity["attributes"] = [] + # Normalize attributes: LLM may return strings instead of dicts + entity["attributes"] = [ + attr if isinstance(attr, dict) else {"name": attr, "type": "text", "description": attr} + for attr in entity["attributes"] + ] if "examples" not in entity: entity["examples"] = [] # 确保description不超过100字符 @@ -322,6 +327,11 @@ def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]: edge["source_targets"] = [] if "attributes" not in edge: edge["attributes"] = [] + # Normalize attributes: LLM may return strings instead of dicts + edge["attributes"] = [ + attr if isinstance(attr, dict) else {"name": attr, "type": "text", "description": attr} + for attr in edge["attributes"] + ] if len(edge.get("description", "")) > 100: edge["description"] = edge["description"][:97] + "..."