diff --git a/docs/docs/examples/cookbooks/GraphRAG_v1.ipynb b/docs/docs/examples/cookbooks/GraphRAG_v1.ipynb index 2c04ae29b7640..83de929561982 100644 --- a/docs/docs/examples/cookbooks/GraphRAG_v1.ipynb +++ b/docs/docs/examples/cookbooks/GraphRAG_v1.ipynb @@ -622,7 +622,7 @@ "\n", " metadata = node.metadata.copy()\n", " for triple in entities_relationship:\n", - " subj, rel, obj, description = triple\n", + " subj, obj, rel, description = triple\n", " subj_node = EntityNode(name=subj, properties=metadata)\n", " obj_node = EntityNode(name=obj, properties=metadata)\n", " metadata[\"relationship_description\"] = description\n", @@ -996,7 +996,6 @@ "- entity_name: Name of the entity, capitalized\n", "- entity_type: Type of the entity\n", "- entity_description: Comprehensive description of the entity's attributes and activities\n", - "Format each entity as (\"entity\"$$$$\"\"$$$$\"\"$$$$\"\")\n", "\n", "2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other.\n", "For each pair of related entities, extract the following information:\n", @@ -1005,9 +1004,45 @@ "- relation: relationship between source_entity and target_entity\n", "- relationship_description: explanation as to why you think the source entity and the target entity are related to each other\n", "\n", - "Format each relationship as (\"relationship\"$$$$\"\"$$$$\"\"$$$$\"\"$$$$\"\")\n", - "\n", - "3. When finished, output.\n", + "3. Output Formatting:\n", + "- Return the result in valid JSON format with two keys: 'entities' (list of entity objects) and 'relationships' (list of relationship objects).\n", + "- Exclude any text outside the JSON structure (e.g., no explanations or comments).\n", + "- If no entities or relationships are identified, return empty lists: { \"entities\": [], \"relationships\": [] }.\n", + "\n", + "-An Output Example-\n", + "{\n", + " \"entities\": [\n", + " {\n", + " \"entity_name\": \"Albert Einstein\",\n", + " \"entity_type\": \"Person\",\n", + " \"entity_description\": \"Albert Einstein was a theoretical physicist who developed the theory of relativity and made significant contributions to physics.\"\n", + " },\n", + " {\n", + " \"entity_name\": \"Theory of Relativity\",\n", + " \"entity_type\": \"Scientific Theory\",\n", + " \"entity_description\": \"A scientific theory developed by Albert Einstein, describing the laws of physics in relation to observers in different frames of reference.\"\n", + " },\n", + " {\n", + " \"entity_name\": \"Nobel Prize in Physics\",\n", + " \"entity_type\": \"Award\",\n", + " \"entity_description\": \"A prestigious international award in the field of physics, awarded annually by the Royal Swedish Academy of Sciences.\"\n", + " }\n", + " ],\n", + " \"relationships\": [\n", + " {\n", + " \"source_entity\": \"Albert Einstein\",\n", + " \"target_entity\": \"Theory of Relativity\",\n", + " \"relation\": \"developed\",\n", + " \"relationship_description\": \"Albert Einstein is the developer of the theory of relativity.\"\n", + " },\n", + " {\n", + " \"source_entity\": \"Albert Einstein\",\n", + " \"target_entity\": \"Nobel Prize in Physics\",\n", + " \"relation\": \"won\",\n", + " \"relationship_description\": \"Albert Einstein won the Nobel Prize in Physics in 1921.\"\n", + " }\n", + " ]\n", + "}\n", "\n", "-Real Data-\n", "######################\n", @@ -1022,14 +1057,40 @@ "metadata": {}, "outputs": [], "source": [ - "entity_pattern = r'\\(\"entity\"\\$\\$\\$\\$\"(.+?)\"\\$\\$\\$\\$\"(.+?)\"\\$\\$\\$\\$\"(.+?)\"\\)'\n", - "relationship_pattern = r'\\(\"relationship\"\\$\\$\\$\\$\"(.+?)\"\\$\\$\\$\\$\"(.+?)\"\\$\\$\\$\\$\"(.+?)\"\\$\\$\\$\\$\"(.+?)\"\\)'\n", + "import json\n", "\n", "\n", "def parse_fn(response_str: str) -> Any:\n", - " entities = re.findall(entity_pattern, response_str)\n", - " relationships = re.findall(relationship_pattern, response_str)\n", - " return entities, relationships\n", + " json_pattern = r\"\\{.*\\}\"\n", + " match = re.search(json_pattern, response_str, re.DOTALL)\n", + " entities = []\n", + " relationships = []\n", + " if not match:\n", + " return entities, relationships\n", + " json_str = match.group(0)\n", + " try:\n", + " data = json.loads(json_str)\n", + " entities = [\n", + " (\n", + " entity[\"entity_name\"],\n", + " entity[\"entity_type\"],\n", + " entity[\"entity_description\"],\n", + " )\n", + " for entity in data.get(\"entities\", [])\n", + " ]\n", + " relationships = [\n", + " (\n", + " relation[\"source_entity\"],\n", + " relation[\"target_entity\"],\n", + " relation[\"relation\"],\n", + " relation[\"relationship_description\"],\n", + " )\n", + " for relation in data.get(\"relationships\", [])\n", + " ]\n", + " return entities, relationships\n", + " except json.JSONDecodeError as e:\n", + " print(\"Error parsing JSON:\", e)\n", + " return entities, relationships\n", "\n", "\n", "kg_extractor = GraphRAGExtractor(\n", diff --git a/docs/docs/examples/cookbooks/GraphRAG_v2.ipynb b/docs/docs/examples/cookbooks/GraphRAG_v2.ipynb index 7e1f2e71809cb..8a3eb7622fd0e 100644 --- a/docs/docs/examples/cookbooks/GraphRAG_v2.ipynb +++ b/docs/docs/examples/cookbooks/GraphRAG_v2.ipynb @@ -778,7 +778,6 @@ "- entity_name: Name of the entity, capitalized\n", "- entity_type: Type of the entity\n", "- entity_description: Comprehensive description of the entity's attributes and activities\n", - "Format each entity as (\"entity\"$$$$\"\"$$$$\"\"$$$$\"\")\n", "\n", "2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other.\n", "For each pair of related entities, extract the following information:\n", @@ -787,9 +786,45 @@ "- relation: relationship between source_entity and target_entity\n", "- relationship_description: explanation as to why you think the source entity and the target entity are related to each other\n", "\n", - "Format each relationship as (\"relationship\"$$$$\"\"$$$$\"\"$$$$\"\"$$$$\"\")\n", - "\n", - "3. When finished, output.\n", + "3. Output Formatting:\n", + "- Return the result in valid JSON format with two keys: 'entities' (list of entity objects) and 'relationships' (list of relationship objects).\n", + "- Exclude any text outside the JSON structure (e.g., no explanations or comments).\n", + "- If no entities or relationships are identified, return empty lists: { \"entities\": [], \"relationships\": [] }.\n", + "\n", + "-An Output Example-\n", + "{\n", + " \"entities\": [\n", + " {\n", + " \"entity_name\": \"Albert Einstein\",\n", + " \"entity_type\": \"Person\",\n", + " \"entity_description\": \"Albert Einstein was a theoretical physicist who developed the theory of relativity and made significant contributions to physics.\"\n", + " },\n", + " {\n", + " \"entity_name\": \"Theory of Relativity\",\n", + " \"entity_type\": \"Scientific Theory\",\n", + " \"entity_description\": \"A scientific theory developed by Albert Einstein, describing the laws of physics in relation to observers in different frames of reference.\"\n", + " },\n", + " {\n", + " \"entity_name\": \"Nobel Prize in Physics\",\n", + " \"entity_type\": \"Award\",\n", + " \"entity_description\": \"A prestigious international award in the field of physics, awarded annually by the Royal Swedish Academy of Sciences.\"\n", + " }\n", + " ],\n", + " \"relationships\": [\n", + " {\n", + " \"source_entity\": \"Albert Einstein\",\n", + " \"target_entity\": \"Theory of Relativity\",\n", + " \"relation\": \"developed\",\n", + " \"relationship_description\": \"Albert Einstein is the developer of the theory of relativity.\"\n", + " },\n", + " {\n", + " \"source_entity\": \"Albert Einstein\",\n", + " \"target_entity\": \"Nobel Prize in Physics\",\n", + " \"relation\": \"won\",\n", + " \"relationship_description\": \"Albert Einstein won the Nobel Prize in Physics in 1921.\"\n", + " }\n", + " ]\n", + "}\n", "\n", "-Real Data-\n", "######################\n", @@ -804,14 +839,40 @@ "metadata": {}, "outputs": [], "source": [ - "entity_pattern = r'\\(\"entity\"\\$\\$\\$\\$\"(.+?)\"\\$\\$\\$\\$\"(.+?)\"\\$\\$\\$\\$\"(.+?)\"\\)'\n", - "relationship_pattern = r'\\(\"relationship\"\\$\\$\\$\\$\"(.+?)\"\\$\\$\\$\\$\"(.+?)\"\\$\\$\\$\\$\"(.+?)\"\\$\\$\\$\\$\"(.+?)\"\\)'\n", + "import json\n", "\n", "\n", "def parse_fn(response_str: str) -> Any:\n", - " entities = re.findall(entity_pattern, response_str)\n", - " relationships = re.findall(relationship_pattern, response_str)\n", - " return entities, relationships\n", + " json_pattern = r\"\\{.*\\}\"\n", + " match = re.search(json_pattern, response_str, re.DOTALL)\n", + " entities = []\n", + " relationships = []\n", + " if not match:\n", + " return entities, relationships\n", + " json_str = match.group(0)\n", + " try:\n", + " data = json.loads(json_str)\n", + " entities = [\n", + " (\n", + " entity[\"entity_name\"],\n", + " entity[\"entity_type\"],\n", + " entity[\"entity_description\"],\n", + " )\n", + " for entity in data.get(\"entities\", [])\n", + " ]\n", + " relationships = [\n", + " (\n", + " relation[\"source_entity\"],\n", + " relation[\"target_entity\"],\n", + " relation[\"relation\"],\n", + " relation[\"relationship_description\"],\n", + " )\n", + " for relation in data.get(\"relationships\", [])\n", + " ]\n", + " return entities, relationships\n", + " except json.JSONDecodeError as e:\n", + " print(\"Error parsing JSON:\", e)\n", + " return entities, relationships\n", "\n", "\n", "kg_extractor = GraphRAGExtractor(\n",