From e98abdf96ff681341d271bfc056d741d71d4a054 Mon Sep 17 00:00:00 2001
From: motaz m alharbi <M.Alharbi@uot.edu.ly>
Date: Sun, 14 Dec 2025 23:26:46 +0200
Subject: [PATCH 1/2] Implement comprehensive tutorial generation framework
 with LLM integration

- Added `nodes.py` to define a series of nodes for fetching repository files, identifying abstractions, analyzing relationships, ordering chapters, and writing tutorial chapters.
- Introduced `FetchRepo` node to crawl GitHub repositories or local directories for source files.
- Created `IdentifyAbstractions` node to leverage LLM for extracting key abstractions from the codebase.
- Developed `AnalyzeRelationships` node to establish connections between identified abstractions.
- Implemented `OrderChapters` node to determine the optimal sequence for tutorial chapters based on abstraction relationships.
- Added `WriteChapters` node to generate Markdown content for each chapter using LLM.
- Created `CombineTutorial` node to compile the generated chapters and create an index file with a Mermaid diagram for visual representation of relationships.
- Established logging and caching made
---
 nodes.py                 |  48 +-
 nodes.py.backup          | 880 +++++++++++++++++++++++++++++++++++++
 nodes.py.backup2         | 919 +++++++++++++++++++++++++++++++++++++++
 utils/call_llm.py        |  77 ++++
 utils/call_llm.py.backup | 185 ++++++++
 5 files changed, 2106 insertions(+), 3 deletions(-)
 create mode 100644 nodes.py.backup
 create mode 100644 nodes.py.backup2
 create mode 100644 utils/call_llm.py.backup

diff --git a/nodes.py b/nodes.py
index 0e3fa587..8f6692b7 100644
--- a/nodes.py
+++ b/nodes.py
@@ -113,6 +113,7 @@ def create_llm_context(files_data):
             language,
             use_cache,
             max_abstraction_num,
+            files_data,
         )  # Return all parameters
 
     def exec(self, prep_res):
@@ -124,6 +125,7 @@ def exec(self, prep_res):
             language,
             use_cache,
             max_abstraction_num,
+            files_data,
         ) = prep_res  # Unpack all parameters
         print(f"Identifying abstractions using LLM...")
 
@@ -173,7 +175,25 @@ def exec(self, prep_res):
     - 5 # path/to/another.js
 # ... up to {max_abstraction_num} abstractions
 ```"""
-        response = call_llm(prompt, use_cache=(use_cache and self.cur_retry == 0))  # Use cache only if enabled and not retrying
+        # Build context from most important files (first 5 files by size/relevance)
+        context_files = sorted(
+            [(path, content) for path, content in files_data],
+            key=lambda x: len(x[1]),
+            reverse=True
+        )[:5]
+
+        rag_context = "\n\n".join([
+            f"=== {path} ===\n{content[:2000]}"  # First 2000 chars per file
+            for path, content in context_files
+        ])
+
+        from utils.call_llm import call_llm_with_context
+        response = call_llm_with_context(
+            prompt=prompt,
+            context=rag_context,
+            use_cache=(use_cache and self.cur_retry == 0),
+            include_remote_rag=True
+        )
 
         # --- Validation ---
         yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
@@ -344,7 +364,18 @@ def exec(self, prep_res):
 
 Now, provide the YAML output:
 """
-        response = call_llm(prompt, use_cache=(use_cache and self.cur_retry == 0)) # Use cache only if enabled and not retrying
+        # Use already gathered context from prep (relevant_files_content_map has the files)
+        # But we can add a focused snippet for relationship analysis
+        from utils.call_llm import call_llm_with_context
+
+        # Context is already built in prep via file_context_str
+        # We'll pass it as additional context
+        response = call_llm_with_context(
+            prompt=prompt,
+            context="",  # Context already in prompt, no need to duplicate
+            use_cache=(use_cache and self.cur_retry == 0),
+            include_remote_rag=True
+        )
 
         # --- Validation ---
         yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
@@ -723,7 +754,18 @@ def exec(self, item):
 
 Now, directly provide a super beginner-friendly Markdown output (DON'T need ```markdown``` tags):
 """
-        chapter_content = call_llm(prompt, use_cache=(use_cache and self.cur_retry == 0)) # Use cache only if enabled and not retrying
+        from utils.call_llm import call_llm_with_context
+
+        # Context from related files is already in file_context_str
+        # Pass it explicitly as RAG context for better separation
+        response = call_llm_with_context(
+            prompt=prompt,
+            context=file_context_str if file_context_str else "",
+            use_cache=(use_cache and self.cur_retry == 0),
+            include_remote_rag=True
+        )
+
+        chapter_content = response
         # Basic validation/cleanup
         actual_heading = f"# Chapter {chapter_num}: {abstraction_name}"  # Use potentially translated name
         if not chapter_content.strip().startswith(f"# Chapter {chapter_num}"):
diff --git a/nodes.py.backup b/nodes.py.backup
new file mode 100644
index 00000000..0e3fa587
--- /dev/null
+++ b/nodes.py.backup
@@ -0,0 +1,880 @@
+import os
+import re
+import yaml
+from pocketflow import Node, BatchNode
+from utils.crawl_github_files import crawl_github_files
+from utils.call_llm import call_llm
+from utils.crawl_local_files import crawl_local_files
+
+
+# Helper to get content for specific file indices
+def get_content_for_indices(files_data, indices):
+    content_map = {}
+    for i in indices:
+        if 0 <= i < len(files_data):
+            path, content = files_data[i]
+            content_map[f"{i} # {path}"] = (
+                content  # Use index + path as key for context
+            )
+    return content_map
+
+
+class FetchRepo(Node):
+    def prep(self, shared):
+        repo_url = shared.get("repo_url")
+        local_dir = shared.get("local_dir")
+        project_name = shared.get("project_name")
+
+        if not project_name:
+            # Basic name derivation from URL or directory
+            if repo_url:
+                project_name = repo_url.split("/")[-1].replace(".git", "")
+            else:
+                project_name = os.path.basename(os.path.abspath(local_dir))
+            shared["project_name"] = project_name
+
+        # Get file patterns directly from shared
+        include_patterns = shared["include_patterns"]
+        exclude_patterns = shared["exclude_patterns"]
+        max_file_size = shared["max_file_size"]
+
+        return {
+            "repo_url": repo_url,
+            "local_dir": local_dir,
+            "token": shared.get("github_token"),
+            "include_patterns": include_patterns,
+            "exclude_patterns": exclude_patterns,
+            "max_file_size": max_file_size,
+            "use_relative_paths": True,
+        }
+
+    def exec(self, prep_res):
+        if prep_res["repo_url"]:
+            print(f"Crawling repository: {prep_res['repo_url']}...")
+            result = crawl_github_files(
+                repo_url=prep_res["repo_url"],
+                token=prep_res["token"],
+                include_patterns=prep_res["include_patterns"],
+                exclude_patterns=prep_res["exclude_patterns"],
+                max_file_size=prep_res["max_file_size"],
+                use_relative_paths=prep_res["use_relative_paths"],
+            )
+        else:
+            print(f"Crawling directory: {prep_res['local_dir']}...")
+
+            result = crawl_local_files(
+                directory=prep_res["local_dir"],
+                include_patterns=prep_res["include_patterns"],
+                exclude_patterns=prep_res["exclude_patterns"],
+                max_file_size=prep_res["max_file_size"],
+                use_relative_paths=prep_res["use_relative_paths"]
+            )
+
+        # Convert dict to list of tuples: [(path, content), ...]
+        files_list = list(result.get("files", {}).items())
+        if len(files_list) == 0:
+            raise (ValueError("Failed to fetch files"))
+        print(f"Fetched {len(files_list)} files.")
+        return files_list
+
+    def post(self, shared, prep_res, exec_res):
+        shared["files"] = exec_res  # List of (path, content) tuples
+
+
+class IdentifyAbstractions(Node):
+    def prep(self, shared):
+        files_data = shared["files"]
+        project_name = shared["project_name"]  # Get project name
+        language = shared.get("language", "english")  # Get language
+        use_cache = shared.get("use_cache", True)  # Get use_cache flag, default to True
+        max_abstraction_num = shared.get("max_abstraction_num", 10)  # Get max_abstraction_num, default to 10
+
+        # Helper to create context from files, respecting limits (basic example)
+        def create_llm_context(files_data):
+            context = ""
+            file_info = []  # Store tuples of (index, path)
+            for i, (path, content) in enumerate(files_data):
+                entry = f"--- File Index {i}: {path} ---\n{content}\n\n"
+                context += entry
+                file_info.append((i, path))
+
+            return context, file_info  # file_info is list of (index, path)
+
+        context, file_info = create_llm_context(files_data)
+        # Format file info for the prompt (comment is just a hint for LLM)
+        file_listing_for_prompt = "\n".join(
+            [f"- {idx} # {path}" for idx, path in file_info]
+        )
+        return (
+            context,
+            file_listing_for_prompt,
+            len(files_data),
+            project_name,
+            language,
+            use_cache,
+            max_abstraction_num,
+        )  # Return all parameters
+
+    def exec(self, prep_res):
+        (
+            context,
+            file_listing_for_prompt,
+            file_count,
+            project_name,
+            language,
+            use_cache,
+            max_abstraction_num,
+        ) = prep_res  # Unpack all parameters
+        print(f"Identifying abstractions using LLM...")
+
+        # Add language instruction and hints only if not English
+        language_instruction = ""
+        name_lang_hint = ""
+        desc_lang_hint = ""
+        if language.lower() != "english":
+            language_instruction = f"IMPORTANT: Generate the `name` and `description` for each abstraction in **{language.capitalize()}** language. Do NOT use English for these fields.\n\n"
+            # Keep specific hints here as name/description are primary targets
+            name_lang_hint = f" (value in {language.capitalize()})"
+            desc_lang_hint = f" (value in {language.capitalize()})"
+
+        prompt = f"""
+For the project `{project_name}`:
+
+Codebase Context:
+{context}
+
+{language_instruction}Analyze the codebase context.
+Identify the top 5-{max_abstraction_num} core most important abstractions to help those new to the codebase.
+
+For each abstraction, provide:
+1. A concise `name`{name_lang_hint}.
+2. A beginner-friendly `description` explaining what it is with a simple analogy, in around 100 words{desc_lang_hint}.
+3. A list of relevant `file_indices` (integers) using the format `idx # path/comment`.
+
+List of file indices and paths present in the context:
+{file_listing_for_prompt}
+
+Format the output as a YAML list of dictionaries:
+
+```yaml
+- name: |
+    Query Processing{name_lang_hint}
+  description: |
+    Explains what the abstraction does.
+    It's like a central dispatcher routing requests.{desc_lang_hint}
+  file_indices:
+    - 0 # path/to/file1.py
+    - 3 # path/to/related.py
+- name: |
+    Query Optimization{name_lang_hint}
+  description: |
+    Another core concept, similar to a blueprint for objects.{desc_lang_hint}
+  file_indices:
+    - 5 # path/to/another.js
+# ... up to {max_abstraction_num} abstractions
+```"""
+        response = call_llm(prompt, use_cache=(use_cache and self.cur_retry == 0))  # Use cache only if enabled and not retrying
+
+        # --- Validation ---
+        yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
+        abstractions = yaml.safe_load(yaml_str)
+
+        if not isinstance(abstractions, list):
+            raise ValueError("LLM Output is not a list")
+
+        validated_abstractions = []
+        for item in abstractions:
+            if not isinstance(item, dict) or not all(
+                k in item for k in ["name", "description", "file_indices"]
+            ):
+                raise ValueError(f"Missing keys in abstraction item: {item}")
+            if not isinstance(item["name"], str):
+                raise ValueError(f"Name is not a string in item: {item}")
+            if not isinstance(item["description"], str):
+                raise ValueError(f"Description is not a string in item: {item}")
+            if not isinstance(item["file_indices"], list):
+                raise ValueError(f"file_indices is not a list in item: {item}")
+
+            # Validate indices
+            validated_indices = []
+            for idx_entry in item["file_indices"]:
+                try:
+                    if isinstance(idx_entry, int):
+                        idx = idx_entry
+                    elif isinstance(idx_entry, str) and "#" in idx_entry:
+                        idx = int(idx_entry.split("#")[0].strip())
+                    else:
+                        idx = int(str(idx_entry).strip())
+
+                    if not (0 <= idx < file_count):
+                        raise ValueError(
+                            f"Invalid file index {idx} found in item {item['name']}. Max index is {file_count - 1}."
+                        )
+                    validated_indices.append(idx)
+                except (ValueError, TypeError):
+                    raise ValueError(
+                        f"Could not parse index from entry: {idx_entry} in item {item['name']}"
+                    )
+
+            item["files"] = sorted(list(set(validated_indices)))
+            # Store only the required fields
+            validated_abstractions.append(
+                {
+                    "name": item["name"],  # Potentially translated name
+                    "description": item[
+                        "description"
+                    ],  # Potentially translated description
+                    "files": item["files"],
+                }
+            )
+
+        print(f"Identified {len(validated_abstractions)} abstractions.")
+        return validated_abstractions
+
+    def post(self, shared, prep_res, exec_res):
+        shared["abstractions"] = (
+            exec_res  # List of {"name": str, "description": str, "files": [int]}
+        )
+
+
+class AnalyzeRelationships(Node):
+    def prep(self, shared):
+        abstractions = shared[
+            "abstractions"
+        ]  # Now contains 'files' list of indices, name/description potentially translated
+        files_data = shared["files"]
+        project_name = shared["project_name"]  # Get project name
+        language = shared.get("language", "english")  # Get language
+        use_cache = shared.get("use_cache", True)  # Get use_cache flag, default to True
+
+        # Get the actual number of abstractions directly
+        num_abstractions = len(abstractions)
+
+        # Create context with abstraction names, indices, descriptions, and relevant file snippets
+        context = "Identified Abstractions:\\n"
+        all_relevant_indices = set()
+        abstraction_info_for_prompt = []
+        for i, abstr in enumerate(abstractions):
+            # Use 'files' which contains indices directly
+            file_indices_str = ", ".join(map(str, abstr["files"]))
+            # Abstraction name and description might be translated already
+            info_line = f"- Index {i}: {abstr['name']} (Relevant file indices: [{file_indices_str}])\\n  Description: {abstr['description']}"
+            context += info_line + "\\n"
+            abstraction_info_for_prompt.append(
+                f"{i} # {abstr['name']}"
+            )  # Use potentially translated name here too
+            all_relevant_indices.update(abstr["files"])
+
+        context += "\\nRelevant File Snippets (Referenced by Index and Path):\\n"
+        # Get content for relevant files using helper
+        relevant_files_content_map = get_content_for_indices(
+            files_data, sorted(list(all_relevant_indices))
+        )
+        # Format file content for context
+        file_context_str = "\\n\\n".join(
+            f"--- File: {idx_path} ---\\n{content}"
+            for idx_path, content in relevant_files_content_map.items()
+        )
+        context += file_context_str
+
+        return (
+            context,
+            "\n".join(abstraction_info_for_prompt),
+            num_abstractions, # Pass the actual count
+            project_name,
+            language,
+            use_cache,
+        )  # Return use_cache
+
+    def exec(self, prep_res):
+        (
+            context,
+            abstraction_listing,
+            num_abstractions, # Receive the actual count
+            project_name,
+            language,
+            use_cache,
+         ) = prep_res  # Unpack use_cache
+        print(f"Analyzing relationships using LLM...")
+
+        # Add language instruction and hints only if not English
+        language_instruction = ""
+        lang_hint = ""
+        list_lang_note = ""
+        if language.lower() != "english":
+            language_instruction = f"IMPORTANT: Generate the `summary` and relationship `label` fields in **{language.capitalize()}** language. Do NOT use English for these fields.\n\n"
+            lang_hint = f" (in {language.capitalize()})"
+            list_lang_note = f" (Names might be in {language.capitalize()})"  # Note for the input list
+
+        prompt = f"""
+Based on the following abstractions and relevant code snippets from the project `{project_name}`:
+
+List of Abstraction Indices and Names{list_lang_note}:
+{abstraction_listing}
+
+Context (Abstractions, Descriptions, Code):
+{context}
+
+{language_instruction}Please provide:
+1. A high-level `summary` of the project's main purpose and functionality in a few beginner-friendly sentences{lang_hint}. Use markdown formatting with **bold** and *italic* text to highlight important concepts.
+2. A list (`relationships`) describing the key interactions between these abstractions. For each relationship, specify:
+    - `from_abstraction`: Index of the source abstraction (e.g., `0 # AbstractionName1`)
+    - `to_abstraction`: Index of the target abstraction (e.g., `1 # AbstractionName2`)
+    - `label`: A brief label for the interaction **in just a few words**{lang_hint} (e.g., "Manages", "Inherits", "Uses").
+    Ideally the relationship should be backed by one abstraction calling or passing parameters to another.
+    Simplify the relationship and exclude those non-important ones.
+
+IMPORTANT: Make sure EVERY abstraction is involved in at least ONE relationship (either as source or target). Each abstraction index must appear at least once across all relationships.
+
+Format the output as YAML:
+
+```yaml
+summary: |
+  A brief, simple explanation of the project{lang_hint}.
+  Can span multiple lines with **bold** and *italic* for emphasis.
+relationships:
+  - from_abstraction: 0 # AbstractionName1
+    to_abstraction: 1 # AbstractionName2
+    label: "Manages"{lang_hint}
+  - from_abstraction: 2 # AbstractionName3
+    to_abstraction: 0 # AbstractionName1
+    label: "Provides config"{lang_hint}
+  # ... other relationships
+```
+
+Now, provide the YAML output:
+"""
+        response = call_llm(prompt, use_cache=(use_cache and self.cur_retry == 0)) # Use cache only if enabled and not retrying
+
+        # --- Validation ---
+        yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
+        relationships_data = yaml.safe_load(yaml_str)
+
+        if not isinstance(relationships_data, dict) or not all(
+            k in relationships_data for k in ["summary", "relationships"]
+        ):
+            raise ValueError(
+                "LLM output is not a dict or missing keys ('summary', 'relationships')"
+            )
+        if not isinstance(relationships_data["summary"], str):
+            raise ValueError("summary is not a string")
+        if not isinstance(relationships_data["relationships"], list):
+            raise ValueError("relationships is not a list")
+
+        # Validate relationships structure
+        validated_relationships = []
+        for rel in relationships_data["relationships"]:
+            # Check for 'label' key
+            if not isinstance(rel, dict) or not all(
+                k in rel for k in ["from_abstraction", "to_abstraction", "label"]
+            ):
+                raise ValueError(
+                    f"Missing keys (expected from_abstraction, to_abstraction, label) in relationship item: {rel}"
+                )
+            # Validate 'label' is a string
+            if not isinstance(rel["label"], str):
+                raise ValueError(f"Relationship label is not a string: {rel}")
+
+            # Validate indices
+            try:
+                from_idx = int(str(rel["from_abstraction"]).split("#")[0].strip())
+                to_idx = int(str(rel["to_abstraction"]).split("#")[0].strip())
+                if not (
+                    0 <= from_idx < num_abstractions and 0 <= to_idx < num_abstractions
+                ):
+                    raise ValueError(
+                        f"Invalid index in relationship: from={from_idx}, to={to_idx}. Max index is {num_abstractions-1}."
+                    )
+                validated_relationships.append(
+                    {
+                        "from": from_idx,
+                        "to": to_idx,
+                        "label": rel["label"],  # Potentially translated label
+                    }
+                )
+            except (ValueError, TypeError):
+                raise ValueError(f"Could not parse indices from relationship: {rel}")
+
+        print("Generated project summary and relationship details.")
+        return {
+            "summary": relationships_data["summary"],  # Potentially translated summary
+            "details": validated_relationships,  # Store validated, index-based relationships with potentially translated labels
+        }
+
+    def post(self, shared, prep_res, exec_res):
+        # Structure is now {"summary": str, "details": [{"from": int, "to": int, "label": str}]}
+        # Summary and label might be translated
+        shared["relationships"] = exec_res
+
+
+class OrderChapters(Node):
+    def prep(self, shared):
+        abstractions = shared["abstractions"]  # Name/description might be translated
+        relationships = shared["relationships"]  # Summary/label might be translated
+        project_name = shared["project_name"]  # Get project name
+        language = shared.get("language", "english")  # Get language
+        use_cache = shared.get("use_cache", True)  # Get use_cache flag, default to True
+
+        # Prepare context for the LLM
+        abstraction_info_for_prompt = []
+        for i, a in enumerate(abstractions):
+            abstraction_info_for_prompt.append(
+                f"- {i} # {a['name']}"
+            )  # Use potentially translated name
+        abstraction_listing = "\n".join(abstraction_info_for_prompt)
+
+        # Use potentially translated summary and labels
+        summary_note = ""
+        if language.lower() != "english":
+            summary_note = (
+                f" (Note: Project Summary might be in {language.capitalize()})"
+            )
+
+        context = f"Project Summary{summary_note}:\n{relationships['summary']}\n\n"
+        context += "Relationships (Indices refer to abstractions above):\n"
+        for rel in relationships["details"]:
+            from_name = abstractions[rel["from"]]["name"]
+            to_name = abstractions[rel["to"]]["name"]
+            # Use potentially translated 'label'
+            context += f"- From {rel['from']} ({from_name}) to {rel['to']} ({to_name}): {rel['label']}\n"  # Label might be translated
+
+        list_lang_note = ""
+        if language.lower() != "english":
+            list_lang_note = f" (Names might be in {language.capitalize()})"
+
+        return (
+            abstraction_listing,
+            context,
+            len(abstractions),
+            project_name,
+            list_lang_note,
+            use_cache,
+        )  # Return use_cache
+
+    def exec(self, prep_res):
+        (
+            abstraction_listing,
+            context,
+            num_abstractions,
+            project_name,
+            list_lang_note,
+            use_cache,
+        ) = prep_res  # Unpack use_cache
+        print("Determining chapter order using LLM...")
+        # No language variation needed here in prompt instructions, just ordering based on structure
+        # The input names might be translated, hence the note.
+        prompt = f"""
+Given the following project abstractions and their relationships for the project ```` {project_name} ````:
+
+Abstractions (Index # Name){list_lang_note}:
+{abstraction_listing}
+
+Context about relationships and project summary:
+{context}
+
+If you are going to make a tutorial for ```` {project_name} ````, what is the best order to explain these abstractions, from first to last?
+Ideally, first explain those that are the most important or foundational, perhaps user-facing concepts or entry points. Then move to more detailed, lower-level implementation details or supporting concepts.
+
+Output the ordered list of abstraction indices, including the name in a comment for clarity. Use the format `idx # AbstractionName`.
+
+```yaml
+- 2 # FoundationalConcept
+- 0 # CoreClassA
+- 1 # CoreClassB (uses CoreClassA)
+- ...
+```
+
+Now, provide the YAML output:
+"""
+        response = call_llm(prompt, use_cache=(use_cache and self.cur_retry == 0)) # Use cache only if enabled and not retrying
+
+        # --- Validation ---
+        yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
+        ordered_indices_raw = yaml.safe_load(yaml_str)
+
+        if not isinstance(ordered_indices_raw, list):
+            raise ValueError("LLM output is not a list")
+
+        ordered_indices = []
+        seen_indices = set()
+        for entry in ordered_indices_raw:
+            try:
+                if isinstance(entry, int):
+                    idx = entry
+                elif isinstance(entry, str) and "#" in entry:
+                    idx = int(entry.split("#")[0].strip())
+                else:
+                    idx = int(str(entry).strip())
+
+                if not (0 <= idx < num_abstractions):
+                    raise ValueError(
+                        f"Invalid index {idx} in ordered list. Max index is {num_abstractions-1}."
+                    )
+                if idx in seen_indices:
+                    raise ValueError(f"Duplicate index {idx} found in ordered list.")
+                ordered_indices.append(idx)
+                seen_indices.add(idx)
+
+            except (ValueError, TypeError):
+                raise ValueError(
+                    f"Could not parse index from ordered list entry: {entry}"
+                )
+
+        # Check if all abstractions are included
+        if len(ordered_indices) != num_abstractions:
+            raise ValueError(
+                f"Ordered list length ({len(ordered_indices)}) does not match number of abstractions ({num_abstractions}). Missing indices: {set(range(num_abstractions)) - seen_indices}"
+            )
+
+        print(f"Determined chapter order (indices): {ordered_indices}")
+        return ordered_indices  # Return the list of indices
+
+    def post(self, shared, prep_res, exec_res):
+        # exec_res is already the list of ordered indices
+        shared["chapter_order"] = exec_res  # List of indices
+
+
+class WriteChapters(BatchNode):
+    def prep(self, shared):
+        chapter_order = shared["chapter_order"]  # List of indices
+        abstractions = shared[
+            "abstractions"
+        ]  # List of {"name": str, "description": str, "files": [int]}
+        files_data = shared["files"]  # List of (path, content) tuples
+        project_name = shared["project_name"]
+        language = shared.get("language", "english")
+        use_cache = shared.get("use_cache", True)  # Get use_cache flag, default to True
+
+        # Get already written chapters to provide context
+        # We store them temporarily during the batch run, not in shared memory yet
+        # The 'previous_chapters_summary' will be built progressively in the exec context
+        self.chapters_written_so_far = (
+            []
+        )  # Use instance variable for temporary storage across exec calls
+
+        # Create a complete list of all chapters
+        all_chapters = []
+        chapter_filenames = {}  # Store chapter filename mapping for linking
+        for i, abstraction_index in enumerate(chapter_order):
+            if 0 <= abstraction_index < len(abstractions):
+                chapter_num = i + 1
+                chapter_name = abstractions[abstraction_index][
+                    "name"
+                ]  # Potentially translated name
+                # Create safe filename (from potentially translated name)
+                safe_name = "".join(
+                    c if c.isalnum() else "_" for c in chapter_name
+                ).lower()
+                filename = f"{i+1:02d}_{safe_name}.md"
+                # Format with link (using potentially translated name)
+                all_chapters.append(f"{chapter_num}. [{chapter_name}]({filename})")
+                # Store mapping of chapter index to filename for linking
+                chapter_filenames[abstraction_index] = {
+                    "num": chapter_num,
+                    "name": chapter_name,
+                    "filename": filename,
+                }
+
+        # Create a formatted string with all chapters
+        full_chapter_listing = "\n".join(all_chapters)
+
+        items_to_process = []
+        for i, abstraction_index in enumerate(chapter_order):
+            if 0 <= abstraction_index < len(abstractions):
+                abstraction_details = abstractions[
+                    abstraction_index
+                ]  # Contains potentially translated name/desc
+                # Use 'files' (list of indices) directly
+                related_file_indices = abstraction_details.get("files", [])
+                # Get content using helper, passing indices
+                related_files_content_map = get_content_for_indices(
+                    files_data, related_file_indices
+                )
+
+                # Get previous chapter info for transitions (uses potentially translated name)
+                prev_chapter = None
+                if i > 0:
+                    prev_idx = chapter_order[i - 1]
+                    prev_chapter = chapter_filenames[prev_idx]
+
+                # Get next chapter info for transitions (uses potentially translated name)
+                next_chapter = None
+                if i < len(chapter_order) - 1:
+                    next_idx = chapter_order[i + 1]
+                    next_chapter = chapter_filenames[next_idx]
+
+                items_to_process.append(
+                    {
+                        "chapter_num": i + 1,
+                        "abstraction_index": abstraction_index,
+                        "abstraction_details": abstraction_details,  # Has potentially translated name/desc
+                        "related_files_content_map": related_files_content_map,
+                        "project_name": shared["project_name"],  # Add project name
+                        "full_chapter_listing": full_chapter_listing,  # Add the full chapter listing (uses potentially translated names)
+                        "chapter_filenames": chapter_filenames,  # Add chapter filenames mapping (uses potentially translated names)
+                        "prev_chapter": prev_chapter,  # Add previous chapter info (uses potentially translated name)
+                        "next_chapter": next_chapter,  # Add next chapter info (uses potentially translated name)
+                        "language": language,  # Add language for multi-language support
+                        "use_cache": use_cache, # Pass use_cache flag
+                        # previous_chapters_summary will be added dynamically in exec
+                    }
+                )
+            else:
+                print(
+                    f"Warning: Invalid abstraction index {abstraction_index} in chapter_order. Skipping."
+                )
+
+        print(f"Preparing to write {len(items_to_process)} chapters...")
+        return items_to_process  # Iterable for BatchNode
+
+    def exec(self, item):
+        # This runs for each item prepared above
+        abstraction_name = item["abstraction_details"][
+            "name"
+        ]  # Potentially translated name
+        abstraction_description = item["abstraction_details"][
+            "description"
+        ]  # Potentially translated description
+        chapter_num = item["chapter_num"]
+        project_name = item.get("project_name")
+        language = item.get("language", "english")
+        use_cache = item.get("use_cache", True) # Read use_cache from item
+        print(f"Writing chapter {chapter_num} for: {abstraction_name} using LLM...")
+
+        # Prepare file context string from the map
+        file_context_str = "\n\n".join(
+            f"--- File: {idx_path.split('# ')[1] if '# ' in idx_path else idx_path} ---\n{content}"
+            for idx_path, content in item["related_files_content_map"].items()
+        )
+
+        # Get summary of chapters written *before* this one
+        # Use the temporary instance variable
+        previous_chapters_summary = "\n---\n".join(self.chapters_written_so_far)
+
+        # Add language instruction and context notes only if not English
+        language_instruction = ""
+        concept_details_note = ""
+        structure_note = ""
+        prev_summary_note = ""
+        instruction_lang_note = ""
+        mermaid_lang_note = ""
+        code_comment_note = ""
+        link_lang_note = ""
+        tone_note = ""
+        if language.lower() != "english":
+            lang_cap = language.capitalize()
+            language_instruction = f"IMPORTANT: Write this ENTIRE tutorial chapter in **{lang_cap}**. Some input context (like concept name, description, chapter list, previous summary) might already be in {lang_cap}, but you MUST translate ALL other generated content including explanations, examples, technical terms, and potentially code comments into {lang_cap}. DO NOT use English anywhere except in code syntax, required proper nouns, or when specified. The entire output MUST be in {lang_cap}.\n\n"
+            concept_details_note = f" (Note: Provided in {lang_cap})"
+            structure_note = f" (Note: Chapter names might be in {lang_cap})"
+            prev_summary_note = f" (Note: This summary might be in {lang_cap})"
+            instruction_lang_note = f" (in {lang_cap})"
+            mermaid_lang_note = f" (Use {lang_cap} for labels/text if appropriate)"
+            code_comment_note = f" (Translate to {lang_cap} if possible, otherwise keep minimal English for clarity)"
+            link_lang_note = (
+                f" (Use the {lang_cap} chapter title from the structure above)"
+            )
+            tone_note = f" (appropriate for {lang_cap} readers)"
+
+        prompt = f"""
+{language_instruction}Write a very beginner-friendly tutorial chapter (in Markdown format) for the project `{project_name}` about the concept: "{abstraction_name}". This is Chapter {chapter_num}.
+
+Concept Details{concept_details_note}:
+- Name: {abstraction_name}
+- Description:
+{abstraction_description}
+
+Complete Tutorial Structure{structure_note}:
+{item["full_chapter_listing"]}
+
+Context from previous chapters{prev_summary_note}:
+{previous_chapters_summary if previous_chapters_summary else "This is the first chapter."}
+
+Relevant Code Snippets (Code itself remains unchanged):
+{file_context_str if file_context_str else "No specific code snippets provided for this abstraction."}
+
+Instructions for the chapter (Generate content in {language.capitalize()} unless specified otherwise):
+- Start with a clear heading (e.g., `# Chapter {chapter_num}: {abstraction_name}`). Use the provided concept name.
+
+- If this is not the first chapter, begin with a brief transition from the previous chapter{instruction_lang_note}, referencing it with a proper Markdown link using its name{link_lang_note}.
+
+- Begin with a high-level motivation explaining what problem this abstraction solves{instruction_lang_note}. Start with a central use case as a concrete example. The whole chapter should guide the reader to understand how to solve this use case. Make it very minimal and friendly to beginners.
+
+- If the abstraction is complex, break it down into key concepts. Explain each concept one-by-one in a very beginner-friendly way{instruction_lang_note}.
+
+- Explain how to use this abstraction to solve the use case{instruction_lang_note}. Give example inputs and outputs for code snippets (if the output isn't values, describe at a high level what will happen{instruction_lang_note}).
+
+- Each code block should be BELOW 10 lines! If longer code blocks are needed, break them down into smaller pieces and walk through them one-by-one. Aggresively simplify the code to make it minimal. Use comments{code_comment_note} to skip non-important implementation details. Each code block should have a beginner friendly explanation right after it{instruction_lang_note}.
+
+- Describe the internal implementation to help understand what's under the hood{instruction_lang_note}. First provide a non-code or code-light walkthrough on what happens step-by-step when the abstraction is called{instruction_lang_note}. It's recommended to use a simple sequenceDiagram with a dummy example - keep it minimal with at most 5 participants to ensure clarity. If participant name has space, use: `participant QP as Query Processing`. {mermaid_lang_note}.
+
+- Then dive deeper into code for the internal implementation with references to files. Provide example code blocks, but make them similarly simple and beginner-friendly. Explain{instruction_lang_note}.
+
+- IMPORTANT: When you need to refer to other core abstractions covered in other chapters, ALWAYS use proper Markdown links like this: [Chapter Title](filename.md). Use the Complete Tutorial Structure above to find the correct filename and the chapter title{link_lang_note}. Translate the surrounding text.
+
+- Use mermaid diagrams to illustrate complex concepts (```mermaid``` format). {mermaid_lang_note}.
+
+- Heavily use analogies and examples throughout{instruction_lang_note} to help beginners understand.
+
+- End the chapter with a brief conclusion that summarizes what was learned{instruction_lang_note} and provides a transition to the next chapter{instruction_lang_note}. If there is a next chapter, use a proper Markdown link: [Next Chapter Title](next_chapter_filename){link_lang_note}.
+
+- Ensure the tone is welcoming and easy for a newcomer to understand{tone_note}.
+
+- Output *only* the Markdown content for this chapter.
+
+Now, directly provide a super beginner-friendly Markdown output (DON'T need ```markdown``` tags):
+"""
+        chapter_content = call_llm(prompt, use_cache=(use_cache and self.cur_retry == 0)) # Use cache only if enabled and not retrying
+        # Basic validation/cleanup
+        actual_heading = f"# Chapter {chapter_num}: {abstraction_name}"  # Use potentially translated name
+        if not chapter_content.strip().startswith(f"# Chapter {chapter_num}"):
+            # Add heading if missing or incorrect, trying to preserve content
+            lines = chapter_content.strip().split("\n")
+            if lines and lines[0].strip().startswith(
+                "#"
+            ):  # If there's some heading, replace it
+                lines[0] = actual_heading
+                chapter_content = "\n".join(lines)
+            else:  # Otherwise, prepend it
+                chapter_content = f"{actual_heading}\n\n{chapter_content}"
+
+        # Add the generated content to our temporary list for the next iteration's context
+        self.chapters_written_so_far.append(chapter_content)
+
+        return chapter_content  # Return the Markdown string (potentially translated)
+
+    def post(self, shared, prep_res, exec_res_list):
+        # exec_res_list contains the generated Markdown for each chapter, in order
+        shared["chapters"] = exec_res_list
+        # Clean up the temporary instance variable
+        del self.chapters_written_so_far
+        print(f"Finished writing {len(exec_res_list)} chapters.")
+
+
+class CombineTutorial(Node):
+    def prep(self, shared):
+        project_name = shared["project_name"]
+        output_base_dir = shared.get("output_dir", "output")  # Default output dir
+        output_path = os.path.join(output_base_dir, project_name)
+        repo_url = shared.get("repo_url")  # Get the repository URL
+        # language = shared.get("language", "english") # No longer needed for fixed strings
+
+        # Get potentially translated data
+        relationships_data = shared[
+            "relationships"
+        ]  # {"summary": str, "details": [{"from": int, "to": int, "label": str}]} -> summary/label potentially translated
+        chapter_order = shared["chapter_order"]  # indices
+        abstractions = shared[
+            "abstractions"
+        ]  # list of dicts -> name/description potentially translated
+        chapters_content = shared[
+            "chapters"
+        ]  # list of strings -> content potentially translated
+
+        # --- Generate Mermaid Diagram ---
+        mermaid_lines = ["flowchart TD"]
+        # Add nodes for each abstraction using potentially translated names
+        for i, abstr in enumerate(abstractions):
+            node_id = f"A{i}"
+            # Use potentially translated name, sanitize for Mermaid ID and label
+            sanitized_name = abstr["name"].replace('"', "")
+            node_label = sanitized_name  # Using sanitized name only
+            mermaid_lines.append(
+                f'    {node_id}["{node_label}"]'
+            )  # Node label uses potentially translated name
+        # Add edges for relationships using potentially translated labels
+        for rel in relationships_data["details"]:
+            from_node_id = f"A{rel['from']}"
+            to_node_id = f"A{rel['to']}"
+            # Use potentially translated label, sanitize
+            edge_label = (
+                rel["label"].replace('"', "").replace("\n", " ")
+            )  # Basic sanitization
+            max_label_len = 30
+            if len(edge_label) > max_label_len:
+                edge_label = edge_label[: max_label_len - 3] + "..."
+            mermaid_lines.append(
+                f'    {from_node_id} -- "{edge_label}" --> {to_node_id}'
+            )  # Edge label uses potentially translated label
+
+        mermaid_diagram = "\n".join(mermaid_lines)
+        # --- End Mermaid ---
+
+        # --- Prepare index.md content ---
+        index_content = f"# Tutorial: {project_name}\n\n"
+        index_content += f"{relationships_data['summary']}\n\n"  # Use the potentially translated summary directly
+        # Keep fixed strings in English
+        index_content += f"**Source Repository:** [{repo_url}]({repo_url})\n\n"
+
+        # Add Mermaid diagram for relationships (diagram itself uses potentially translated names/labels)
+        index_content += "```mermaid\n"
+        index_content += mermaid_diagram + "\n"
+        index_content += "```\n\n"
+
+        # Keep fixed strings in English
+        index_content += f"## Chapters\n\n"
+
+        chapter_files = []
+        # Generate chapter links based on the determined order, using potentially translated names
+        for i, abstraction_index in enumerate(chapter_order):
+            # Ensure index is valid and we have content for it
+            if 0 <= abstraction_index < len(abstractions) and i < len(chapters_content):
+                abstraction_name = abstractions[abstraction_index][
+                    "name"
+                ]  # Potentially translated name
+                # Sanitize potentially translated name for filename
+                safe_name = "".join(
+                    c if c.isalnum() else "_" for c in abstraction_name
+                ).lower()
+                filename = f"{i+1:02d}_{safe_name}.md"
+                index_content += f"{i+1}. [{abstraction_name}]({filename})\n"  # Use potentially translated name in link text
+
+                # Add attribution to chapter content (using English fixed string)
+                chapter_content = chapters_content[i]  # Potentially translated content
+                if not chapter_content.endswith("\n\n"):
+                    chapter_content += "\n\n"
+                # Keep fixed strings in English
+                chapter_content += f"---\n\nGenerated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)"
+
+                # Store filename and corresponding content
+                chapter_files.append({"filename": filename, "content": chapter_content})
+            else:
+                print(
+                    f"Warning: Mismatch between chapter order, abstractions, or content at index {i} (abstraction index {abstraction_index}). Skipping file generation for this entry."
+                )
+
+        # Add attribution to index content (using English fixed string)
+        index_content += f"\n\n---\n\nGenerated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)"
+
+        return {
+            "output_path": output_path,
+            "index_content": index_content,
+            "chapter_files": chapter_files,  # List of {"filename": str, "content": str}
+        }
+
+    def exec(self, prep_res):
+        output_path = prep_res["output_path"]
+        index_content = prep_res["index_content"]
+        chapter_files = prep_res["chapter_files"]
+
+        print(f"Combining tutorial into directory: {output_path}")
+        # Rely on Node's built-in retry/fallback
+        os.makedirs(output_path, exist_ok=True)
+
+        # Write index.md
+        index_filepath = os.path.join(output_path, "index.md")
+        with open(index_filepath, "w", encoding="utf-8") as f:
+            f.write(index_content)
+        print(f"  - Wrote {index_filepath}")
+
+        # Write chapter files
+        for chapter_info in chapter_files:
+            chapter_filepath = os.path.join(output_path, chapter_info["filename"])
+            with open(chapter_filepath, "w", encoding="utf-8") as f:
+                f.write(chapter_info["content"])
+            print(f"  - Wrote {chapter_filepath}")
+
+        return output_path  # Return the final path
+
+    def post(self, shared, prep_res, exec_res):
+        shared["final_output_dir"] = exec_res  # Store the output path
+        print(f"\nTutorial generation complete! Files are in: {exec_res}")
diff --git a/nodes.py.backup2 b/nodes.py.backup2
new file mode 100644
index 00000000..91a9adac
--- /dev/null
+++ b/nodes.py.backup2
@@ -0,0 +1,919 @@
+import os
+import re
+import yaml
+from pocketflow import Node, BatchNode
+from utils.crawl_github_files import crawl_github_files
+from utils.call_llm import call_llm
+from utils.crawl_local_files import crawl_local_files
+
+
+# Helper to get content for specific file indices
+def get_content_for_indices(files_data, indices):
+    content_map = {}
+    for i in indices:
+        if 0 <= i < len(files_data):
+            path, content = files_data[i]
+            content_map[f"{i} # {path}"] = (
+                content  # Use index + path as key for context
+            )
+    return content_map
+
+
+class FetchRepo(Node):
+    def prep(self, shared):
+        repo_url = shared.get("repo_url")
+        local_dir = shared.get("local_dir")
+        project_name = shared.get("project_name")
+
+        if not project_name:
+            # Basic name derivation from URL or directory
+            if repo_url:
+                project_name = repo_url.split("/")[-1].replace(".git", "")
+            else:
+                project_name = os.path.basename(os.path.abspath(local_dir))
+            shared["project_name"] = project_name
+
+        # Get file patterns directly from shared
+        include_patterns = shared["include_patterns"]
+        exclude_patterns = shared["exclude_patterns"]
+        max_file_size = shared["max_file_size"]
+
+        return {
+            "repo_url": repo_url,
+            "local_dir": local_dir,
+            "token": shared.get("github_token"),
+            "include_patterns": include_patterns,
+            "exclude_patterns": exclude_patterns,
+            "max_file_size": max_file_size,
+            "use_relative_paths": True,
+        }
+
+    def exec(self, prep_res):
+        if prep_res["repo_url"]:
+            print(f"Crawling repository: {prep_res['repo_url']}...")
+            result = crawl_github_files(
+                repo_url=prep_res["repo_url"],
+                token=prep_res["token"],
+                include_patterns=prep_res["include_patterns"],
+                exclude_patterns=prep_res["exclude_patterns"],
+                max_file_size=prep_res["max_file_size"],
+                use_relative_paths=prep_res["use_relative_paths"],
+            )
+        else:
+            print(f"Crawling directory: {prep_res['local_dir']}...")
+
+            result = crawl_local_files(
+                directory=prep_res["local_dir"],
+                include_patterns=prep_res["include_patterns"],
+                exclude_patterns=prep_res["exclude_patterns"],
+                max_file_size=prep_res["max_file_size"],
+                use_relative_paths=prep_res["use_relative_paths"]
+            )
+
+        # Convert dict to list of tuples: [(path, content), ...]
+        files_list = list(result.get("files", {}).items())
+        if len(files_list) == 0:
+            raise (ValueError("Failed to fetch files"))
+        print(f"Fetched {len(files_list)} files.")
+        return files_list
+
+    def post(self, shared, prep_res, exec_res):
+        shared["files"] = exec_res  # List of (path, content) tuples
+
+
+class IdentifyAbstractions(Node):
+    def prep(self, shared):
+        files_data = shared["files"]
+        project_name = shared["project_name"]  # Get project name
+        language = shared.get("language", "english")  # Get language
+        use_cache = shared.get("use_cache", True)  # Get use_cache flag, default to True
+        max_abstraction_num = shared.get("max_abstraction_num", 10)  # Get max_abstraction_num, default to 10
+
+        # Helper to create context from files, respecting limits (basic example)
+        def create_llm_context(files_data):
+            context = ""
+            file_info = []  # Store tuples of (index, path)
+            for i, (path, content) in enumerate(files_data):
+                entry = f"--- File Index {i}: {path} ---\n{content}\n\n"
+                context += entry
+                file_info.append((i, path))
+
+            return context, file_info  # file_info is list of (index, path)
+
+        context, file_info = create_llm_context(files_data)
+        # Format file info for the prompt (comment is just a hint for LLM)
+        file_listing_for_prompt = "\n".join(
+            [f"- {idx} # {path}" for idx, path in file_info]
+        )
+        return (
+            context,
+            file_listing_for_prompt,
+            len(files_data),
+            project_name,
+            language,
+            use_cache,
+            max_abstraction_num,
+            files_data,
+        )  # Return all parameters
+
+    def exec(self, prep_res):
+        (
+            context,
+            file_listing_for_prompt,
+            file_count,
+            project_name,
+            language,
+            use_cache,
+            max_abstraction_num,
+            files_data,
+        ) = prep_res  # Unpack all parameters
+        print(f"Identifying abstractions using LLM...")
+
+        # Add language instruction and hints only if not English
+        language_instruction = ""
+        name_lang_hint = ""
+        desc_lang_hint = ""
+        if language.lower() != "english":
+            language_instruction = f"IMPORTANT: Generate the `name` and `description` for each abstraction in **{language.capitalize()}** language. Do NOT use English for these fields.\n\n"
+            # Keep specific hints here as name/description are primary targets
+            name_lang_hint = f" (value in {language.capitalize()})"
+            desc_lang_hint = f" (value in {language.capitalize()})"
+
+        prompt = f"""
+For the project `{project_name}`:
+
+Codebase Context:
+{context}
+
+{language_instruction}Analyze the codebase context.
+Identify the top 5-{max_abstraction_num} core most important abstractions to help those new to the codebase.
+
+For each abstraction, provide:
+1. A concise `name`{name_lang_hint}.
+2. A beginner-friendly `description` explaining what it is with a simple analogy, in around 100 words{desc_lang_hint}.
+3. A list of relevant `file_indices` (integers) using the format `idx # path/comment`.
+
+List of file indices and paths present in the context:
+{file_listing_for_prompt}
+
+Format the output as a YAML list of dictionaries:
+
+```yaml
+- name: |
+    Query Processing{name_lang_hint}
+  description: |
+    Explains what the abstraction does.
+    It's like a central dispatcher routing requests.{desc_lang_hint}
+  file_indices:
+    - 0 # path/to/file1.py
+    - 3 # path/to/related.py
+- name: |
+    Query Optimization{name_lang_hint}
+  description: |
+    Another core concept, similar to a blueprint for objects.{desc_lang_hint}
+  file_indices:
+    - 5 # path/to/another.js
+# ... up to {max_abstraction_num} abstractions
+```"""
+        # Build context from most important files (first 5 files by size/relevance)
+        context_files = sorted(
+            [(path, content) for path, content in files_data],
+            key=lambda x: len(x[1]),
+            reverse=True
+        )[:5]
+
+        rag_context = "\n\n".join([
+            f"=== {path} ===\n{content[:2000]}"  # First 2000 chars per file
+            for path, content in context_files
+        ])
+
+        from utils.call_llm import call_llm_with_context
+        response = call_llm_with_context(
+            prompt=prompt,
+            context=rag_context,
+            use_cache=(use_cache and self.cur_retry == 0)
+        )
+
+        # --- Validation ---
+        yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
+        abstractions = yaml.safe_load(yaml_str)
+
+        if not isinstance(abstractions, list):
+            raise ValueError("LLM Output is not a list")
+
+        validated_abstractions = []
+        for item in abstractions:
+            if not isinstance(item, dict) or not all(
+                k in item for k in ["name", "description", "file_indices"]
+            ):
+                raise ValueError(f"Missing keys in abstraction item: {item}")
+            if not isinstance(item["name"], str):
+                raise ValueError(f"Name is not a string in item: {item}")
+            if not isinstance(item["description"], str):
+                raise ValueError(f"Description is not a string in item: {item}")
+            if not isinstance(item["file_indices"], list):
+                raise ValueError(f"file_indices is not a list in item: {item}")
+
+            # Validate indices
+            validated_indices = []
+            for idx_entry in item["file_indices"]:
+                try:
+                    if isinstance(idx_entry, int):
+                        idx = idx_entry
+                    elif isinstance(idx_entry, str) and "#" in idx_entry:
+                        idx = int(idx_entry.split("#")[0].strip())
+                    else:
+                        idx = int(str(idx_entry).strip())
+
+                    if not (0 <= idx < file_count):
+                        raise ValueError(
+                            f"Invalid file index {idx} found in item {item['name']}. Max index is {file_count - 1}."
+                        )
+                    validated_indices.append(idx)
+                except (ValueError, TypeError):
+                    raise ValueError(
+                        f"Could not parse index from entry: {idx_entry} in item {item['name']}"
+                    )
+
+            item["files"] = sorted(list(set(validated_indices)))
+            # Store only the required fields
+            validated_abstractions.append(
+                {
+                    "name": item["name"],  # Potentially translated name
+                    "description": item[
+                        "description"
+                    ],  # Potentially translated description
+                    "files": item["files"],
+                }
+            )
+
+        print(f"Identified {len(validated_abstractions)} abstractions.")
+        return validated_abstractions
+
+    def post(self, shared, prep_res, exec_res):
+        shared["abstractions"] = (
+            exec_res  # List of {"name": str, "description": str, "files": [int]}
+        )
+
+
+class AnalyzeRelationships(Node):
+    def prep(self, shared):
+        abstractions = shared[
+            "abstractions"
+        ]  # Now contains 'files' list of indices, name/description potentially translated
+        files_data = shared["files"]
+        project_name = shared["project_name"]  # Get project name
+        language = shared.get("language", "english")  # Get language
+        use_cache = shared.get("use_cache", True)  # Get use_cache flag, default to True
+
+        # Get the actual number of abstractions directly
+        num_abstractions = len(abstractions)
+
+        # Create context with abstraction names, indices, descriptions, and relevant file snippets
+        context = "Identified Abstractions:\\n"
+        all_relevant_indices = set()
+        abstraction_info_for_prompt = []
+        for i, abstr in enumerate(abstractions):
+            # Use 'files' which contains indices directly
+            file_indices_str = ", ".join(map(str, abstr["files"]))
+            # Abstraction name and description might be translated already
+            info_line = f"- Index {i}: {abstr['name']} (Relevant file indices: [{file_indices_str}])\\n  Description: {abstr['description']}"
+            context += info_line + "\\n"
+            abstraction_info_for_prompt.append(
+                f"{i} # {abstr['name']}"
+            )  # Use potentially translated name here too
+            all_relevant_indices.update(abstr["files"])
+
+        context += "\\nRelevant File Snippets (Referenced by Index and Path):\\n"
+        # Get content for relevant files using helper
+        relevant_files_content_map = get_content_for_indices(
+            files_data, sorted(list(all_relevant_indices))
+        )
+        # Format file content for context
+        file_context_str = "\\n\\n".join(
+            f"--- File: {idx_path} ---\\n{content}"
+            for idx_path, content in relevant_files_content_map.items()
+        )
+        context += file_context_str
+
+        return (
+            context,
+            "\n".join(abstraction_info_for_prompt),
+            num_abstractions, # Pass the actual count
+            project_name,
+            language,
+            use_cache,
+        )  # Return use_cache
+
+    def exec(self, prep_res):
+        (
+            context,
+            abstraction_listing,
+            num_abstractions, # Receive the actual count
+            project_name,
+            language,
+            use_cache,
+         ) = prep_res  # Unpack use_cache
+        print(f"Analyzing relationships using LLM...")
+
+        # Add language instruction and hints only if not English
+        language_instruction = ""
+        lang_hint = ""
+        list_lang_note = ""
+        if language.lower() != "english":
+            language_instruction = f"IMPORTANT: Generate the `summary` and relationship `label` fields in **{language.capitalize()}** language. Do NOT use English for these fields.\n\n"
+            lang_hint = f" (in {language.capitalize()})"
+            list_lang_note = f" (Names might be in {language.capitalize()})"  # Note for the input list
+
+        prompt = f"""
+Based on the following abstractions and relevant code snippets from the project `{project_name}`:
+
+List of Abstraction Indices and Names{list_lang_note}:
+{abstraction_listing}
+
+Context (Abstractions, Descriptions, Code):
+{context}
+
+{language_instruction}Please provide:
+1. A high-level `summary` of the project's main purpose and functionality in a few beginner-friendly sentences{lang_hint}. Use markdown formatting with **bold** and *italic* text to highlight important concepts.
+2. A list (`relationships`) describing the key interactions between these abstractions. For each relationship, specify:
+    - `from_abstraction`: Index of the source abstraction (e.g., `0 # AbstractionName1`)
+    - `to_abstraction`: Index of the target abstraction (e.g., `1 # AbstractionName2`)
+    - `label`: A brief label for the interaction **in just a few words**{lang_hint} (e.g., "Manages", "Inherits", "Uses").
+    Ideally the relationship should be backed by one abstraction calling or passing parameters to another.
+    Simplify the relationship and exclude those non-important ones.
+
+IMPORTANT: Make sure EVERY abstraction is involved in at least ONE relationship (either as source or target). Each abstraction index must appear at least once across all relationships.
+
+Format the output as YAML:
+
+```yaml
+summary: |
+  A brief, simple explanation of the project{lang_hint}.
+  Can span multiple lines with **bold** and *italic* for emphasis.
+relationships:
+  - from_abstraction: 0 # AbstractionName1
+    to_abstraction: 1 # AbstractionName2
+    label: "Manages"{lang_hint}
+  - from_abstraction: 2 # AbstractionName3
+    to_abstraction: 0 # AbstractionName1
+    label: "Provides config"{lang_hint}
+  # ... other relationships
+```
+
+Now, provide the YAML output:
+"""
+        # Use already gathered context from prep (relevant_files_content_map has the files)
+        # But we can add a focused snippet for relationship analysis
+        from utils.call_llm import call_llm_with_context
+
+        # Context is already built in prep via file_context_str
+        # We'll pass it as additional context
+        response = call_llm_with_context(
+            prompt=prompt,
+            context="",  # Context already in prompt, no need to duplicate
+            use_cache=(use_cache and self.cur_retry == 0)
+        )
+
+        # --- Validation ---
+        yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
+        relationships_data = yaml.safe_load(yaml_str)
+
+        if not isinstance(relationships_data, dict) or not all(
+            k in relationships_data for k in ["summary", "relationships"]
+        ):
+            raise ValueError(
+                "LLM output is not a dict or missing keys ('summary', 'relationships')"
+            )
+        if not isinstance(relationships_data["summary"], str):
+            raise ValueError("summary is not a string")
+        if not isinstance(relationships_data["relationships"], list):
+            raise ValueError("relationships is not a list")
+
+        # Validate relationships structure
+        validated_relationships = []
+        for rel in relationships_data["relationships"]:
+            # Check for 'label' key
+            if not isinstance(rel, dict) or not all(
+                k in rel for k in ["from_abstraction", "to_abstraction", "label"]
+            ):
+                raise ValueError(
+                    f"Missing keys (expected from_abstraction, to_abstraction, label) in relationship item: {rel}"
+                )
+            # Validate 'label' is a string
+            if not isinstance(rel["label"], str):
+                raise ValueError(f"Relationship label is not a string: {rel}")
+
+            # Validate indices
+            try:
+                from_idx = int(str(rel["from_abstraction"]).split("#")[0].strip())
+                to_idx = int(str(rel["to_abstraction"]).split("#")[0].strip())
+                if not (
+                    0 <= from_idx < num_abstractions and 0 <= to_idx < num_abstractions
+                ):
+                    raise ValueError(
+                        f"Invalid index in relationship: from={from_idx}, to={to_idx}. Max index is {num_abstractions-1}."
+                    )
+                validated_relationships.append(
+                    {
+                        "from": from_idx,
+                        "to": to_idx,
+                        "label": rel["label"],  # Potentially translated label
+                    }
+                )
+            except (ValueError, TypeError):
+                raise ValueError(f"Could not parse indices from relationship: {rel}")
+
+        print("Generated project summary and relationship details.")
+        return {
+            "summary": relationships_data["summary"],  # Potentially translated summary
+            "details": validated_relationships,  # Store validated, index-based relationships with potentially translated labels
+        }
+
+    def post(self, shared, prep_res, exec_res):
+        # Structure is now {"summary": str, "details": [{"from": int, "to": int, "label": str}]}
+        # Summary and label might be translated
+        shared["relationships"] = exec_res
+
+
+class OrderChapters(Node):
+    def prep(self, shared):
+        abstractions = shared["abstractions"]  # Name/description might be translated
+        relationships = shared["relationships"]  # Summary/label might be translated
+        project_name = shared["project_name"]  # Get project name
+        language = shared.get("language", "english")  # Get language
+        use_cache = shared.get("use_cache", True)  # Get use_cache flag, default to True
+
+        # Prepare context for the LLM
+        abstraction_info_for_prompt = []
+        for i, a in enumerate(abstractions):
+            abstraction_info_for_prompt.append(
+                f"- {i} # {a['name']}"
+            )  # Use potentially translated name
+        abstraction_listing = "\n".join(abstraction_info_for_prompt)
+
+        # Use potentially translated summary and labels
+        summary_note = ""
+        if language.lower() != "english":
+            summary_note = (
+                f" (Note: Project Summary might be in {language.capitalize()})"
+            )
+
+        context = f"Project Summary{summary_note}:\n{relationships['summary']}\n\n"
+        context += "Relationships (Indices refer to abstractions above):\n"
+        for rel in relationships["details"]:
+            from_name = abstractions[rel["from"]]["name"]
+            to_name = abstractions[rel["to"]]["name"]
+            # Use potentially translated 'label'
+            context += f"- From {rel['from']} ({from_name}) to {rel['to']} ({to_name}): {rel['label']}\n"  # Label might be translated
+
+        list_lang_note = ""
+        if language.lower() != "english":
+            list_lang_note = f" (Names might be in {language.capitalize()})"
+
+        return (
+            abstraction_listing,
+            context,
+            len(abstractions),
+            project_name,
+            list_lang_note,
+            use_cache,
+        )  # Return use_cache
+
+    def exec(self, prep_res):
+        (
+            abstraction_listing,
+            context,
+            num_abstractions,
+            project_name,
+            list_lang_note,
+            use_cache,
+        ) = prep_res  # Unpack use_cache
+        print("Determining chapter order using LLM...")
+        # No language variation needed here in prompt instructions, just ordering based on structure
+        # The input names might be translated, hence the note.
+        prompt = f"""
+Given the following project abstractions and their relationships for the project ```` {project_name} ````:
+
+Abstractions (Index # Name){list_lang_note}:
+{abstraction_listing}
+
+Context about relationships and project summary:
+{context}
+
+If you are going to make a tutorial for ```` {project_name} ````, what is the best order to explain these abstractions, from first to last?
+Ideally, first explain those that are the most important or foundational, perhaps user-facing concepts or entry points. Then move to more detailed, lower-level implementation details or supporting concepts.
+
+Output the ordered list of abstraction indices, including the name in a comment for clarity. Use the format `idx # AbstractionName`.
+
+```yaml
+- 2 # FoundationalConcept
+- 0 # CoreClassA
+- 1 # CoreClassB (uses CoreClassA)
+- ...
+```
+
+Now, provide the YAML output:
+"""
+        response = call_llm(prompt, use_cache=(use_cache and self.cur_retry == 0)) # Use cache only if enabled and not retrying
+
+        # --- Validation ---
+        yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
+        ordered_indices_raw = yaml.safe_load(yaml_str)
+
+        if not isinstance(ordered_indices_raw, list):
+            raise ValueError("LLM output is not a list")
+
+        ordered_indices = []
+        seen_indices = set()
+        for entry in ordered_indices_raw:
+            try:
+                if isinstance(entry, int):
+                    idx = entry
+                elif isinstance(entry, str) and "#" in entry:
+                    idx = int(entry.split("#")[0].strip())
+                else:
+                    idx = int(str(entry).strip())
+
+                if not (0 <= idx < num_abstractions):
+                    raise ValueError(
+                        f"Invalid index {idx} in ordered list. Max index is {num_abstractions-1}."
+                    )
+                if idx in seen_indices:
+                    raise ValueError(f"Duplicate index {idx} found in ordered list.")
+                ordered_indices.append(idx)
+                seen_indices.add(idx)
+
+            except (ValueError, TypeError):
+                raise ValueError(
+                    f"Could not parse index from ordered list entry: {entry}"
+                )
+
+        # Check if all abstractions are included
+        if len(ordered_indices) != num_abstractions:
+            raise ValueError(
+                f"Ordered list length ({len(ordered_indices)}) does not match number of abstractions ({num_abstractions}). Missing indices: {set(range(num_abstractions)) - seen_indices}"
+            )
+
+        print(f"Determined chapter order (indices): {ordered_indices}")
+        return ordered_indices  # Return the list of indices
+
+    def post(self, shared, prep_res, exec_res):
+        # exec_res is already the list of ordered indices
+        shared["chapter_order"] = exec_res  # List of indices
+
+
+class WriteChapters(BatchNode):
+    def prep(self, shared):
+        chapter_order = shared["chapter_order"]  # List of indices
+        abstractions = shared[
+            "abstractions"
+        ]  # List of {"name": str, "description": str, "files": [int]}
+        files_data = shared["files"]  # List of (path, content) tuples
+        project_name = shared["project_name"]
+        language = shared.get("language", "english")
+        use_cache = shared.get("use_cache", True)  # Get use_cache flag, default to True
+
+        # Get already written chapters to provide context
+        # We store them temporarily during the batch run, not in shared memory yet
+        # The 'previous_chapters_summary' will be built progressively in the exec context
+        self.chapters_written_so_far = (
+            []
+        )  # Use instance variable for temporary storage across exec calls
+
+        # Create a complete list of all chapters
+        all_chapters = []
+        chapter_filenames = {}  # Store chapter filename mapping for linking
+        for i, abstraction_index in enumerate(chapter_order):
+            if 0 <= abstraction_index < len(abstractions):
+                chapter_num = i + 1
+                chapter_name = abstractions[abstraction_index][
+                    "name"
+                ]  # Potentially translated name
+                # Create safe filename (from potentially translated name)
+                safe_name = "".join(
+                    c if c.isalnum() else "_" for c in chapter_name
+                ).lower()
+                filename = f"{i+1:02d}_{safe_name}.md"
+                # Format with link (using potentially translated name)
+                all_chapters.append(f"{chapter_num}. [{chapter_name}]({filename})")
+                # Store mapping of chapter index to filename for linking
+                chapter_filenames[abstraction_index] = {
+                    "num": chapter_num,
+                    "name": chapter_name,
+                    "filename": filename,
+                }
+
+        # Create a formatted string with all chapters
+        full_chapter_listing = "\n".join(all_chapters)
+
+        items_to_process = []
+        for i, abstraction_index in enumerate(chapter_order):
+            if 0 <= abstraction_index < len(abstractions):
+                abstraction_details = abstractions[
+                    abstraction_index
+                ]  # Contains potentially translated name/desc
+                # Use 'files' (list of indices) directly
+                related_file_indices = abstraction_details.get("files", [])
+                # Get content using helper, passing indices
+                related_files_content_map = get_content_for_indices(
+                    files_data, related_file_indices
+                )
+
+                # Get previous chapter info for transitions (uses potentially translated name)
+                prev_chapter = None
+                if i > 0:
+                    prev_idx = chapter_order[i - 1]
+                    prev_chapter = chapter_filenames[prev_idx]
+
+                # Get next chapter info for transitions (uses potentially translated name)
+                next_chapter = None
+                if i < len(chapter_order) - 1:
+                    next_idx = chapter_order[i + 1]
+                    next_chapter = chapter_filenames[next_idx]
+
+                items_to_process.append(
+                    {
+                        "chapter_num": i + 1,
+                        "abstraction_index": abstraction_index,
+                        "abstraction_details": abstraction_details,  # Has potentially translated name/desc
+                        "related_files_content_map": related_files_content_map,
+                        "project_name": shared["project_name"],  # Add project name
+                        "full_chapter_listing": full_chapter_listing,  # Add the full chapter listing (uses potentially translated names)
+                        "chapter_filenames": chapter_filenames,  # Add chapter filenames mapping (uses potentially translated names)
+                        "prev_chapter": prev_chapter,  # Add previous chapter info (uses potentially translated name)
+                        "next_chapter": next_chapter,  # Add next chapter info (uses potentially translated name)
+                        "language": language,  # Add language for multi-language support
+                        "use_cache": use_cache, # Pass use_cache flag
+                        # previous_chapters_summary will be added dynamically in exec
+                    }
+                )
+            else:
+                print(
+                    f"Warning: Invalid abstraction index {abstraction_index} in chapter_order. Skipping."
+                )
+
+        print(f"Preparing to write {len(items_to_process)} chapters...")
+        return items_to_process  # Iterable for BatchNode
+
+    def exec(self, item):
+        # This runs for each item prepared above
+        abstraction_name = item["abstraction_details"][
+            "name"
+        ]  # Potentially translated name
+        abstraction_description = item["abstraction_details"][
+            "description"
+        ]  # Potentially translated description
+        chapter_num = item["chapter_num"]
+        project_name = item.get("project_name")
+        language = item.get("language", "english")
+        use_cache = item.get("use_cache", True) # Read use_cache from item
+        print(f"Writing chapter {chapter_num} for: {abstraction_name} using LLM...")
+
+        # Prepare file context string from the map
+        file_context_str = "\n\n".join(
+            f"--- File: {idx_path.split('# ')[1] if '# ' in idx_path else idx_path} ---\n{content}"
+            for idx_path, content in item["related_files_content_map"].items()
+        )
+
+        # Get summary of chapters written *before* this one
+        # Use the temporary instance variable
+        previous_chapters_summary = "\n---\n".join(self.chapters_written_so_far)
+
+        # Add language instruction and context notes only if not English
+        language_instruction = ""
+        concept_details_note = ""
+        structure_note = ""
+        prev_summary_note = ""
+        instruction_lang_note = ""
+        mermaid_lang_note = ""
+        code_comment_note = ""
+        link_lang_note = ""
+        tone_note = ""
+        if language.lower() != "english":
+            lang_cap = language.capitalize()
+            language_instruction = f"IMPORTANT: Write this ENTIRE tutorial chapter in **{lang_cap}**. Some input context (like concept name, description, chapter list, previous summary) might already be in {lang_cap}, but you MUST translate ALL other generated content including explanations, examples, technical terms, and potentially code comments into {lang_cap}. DO NOT use English anywhere except in code syntax, required proper nouns, or when specified. The entire output MUST be in {lang_cap}.\n\n"
+            concept_details_note = f" (Note: Provided in {lang_cap})"
+            structure_note = f" (Note: Chapter names might be in {lang_cap})"
+            prev_summary_note = f" (Note: This summary might be in {lang_cap})"
+            instruction_lang_note = f" (in {lang_cap})"
+            mermaid_lang_note = f" (Use {lang_cap} for labels/text if appropriate)"
+            code_comment_note = f" (Translate to {lang_cap} if possible, otherwise keep minimal English for clarity)"
+            link_lang_note = (
+                f" (Use the {lang_cap} chapter title from the structure above)"
+            )
+            tone_note = f" (appropriate for {lang_cap} readers)"
+
+        prompt = f"""
+{language_instruction}Write a very beginner-friendly tutorial chapter (in Markdown format) for the project `{project_name}` about the concept: "{abstraction_name}". This is Chapter {chapter_num}.
+
+Concept Details{concept_details_note}:
+- Name: {abstraction_name}
+- Description:
+{abstraction_description}
+
+Complete Tutorial Structure{structure_note}:
+{item["full_chapter_listing"]}
+
+Context from previous chapters{prev_summary_note}:
+{previous_chapters_summary if previous_chapters_summary else "This is the first chapter."}
+
+Relevant Code Snippets (Code itself remains unchanged):
+{file_context_str if file_context_str else "No specific code snippets provided for this abstraction."}
+
+Instructions for the chapter (Generate content in {language.capitalize()} unless specified otherwise):
+- Start with a clear heading (e.g., `# Chapter {chapter_num}: {abstraction_name}`). Use the provided concept name.
+
+- If this is not the first chapter, begin with a brief transition from the previous chapter{instruction_lang_note}, referencing it with a proper Markdown link using its name{link_lang_note}.
+
+- Begin with a high-level motivation explaining what problem this abstraction solves{instruction_lang_note}. Start with a central use case as a concrete example. The whole chapter should guide the reader to understand how to solve this use case. Make it very minimal and friendly to beginners.
+
+- If the abstraction is complex, break it down into key concepts. Explain each concept one-by-one in a very beginner-friendly way{instruction_lang_note}.
+
+- Explain how to use this abstraction to solve the use case{instruction_lang_note}. Give example inputs and outputs for code snippets (if the output isn't values, describe at a high level what will happen{instruction_lang_note}).
+
+- Each code block should be BELOW 10 lines! If longer code blocks are needed, break them down into smaller pieces and walk through them one-by-one. Aggresively simplify the code to make it minimal. Use comments{code_comment_note} to skip non-important implementation details. Each code block should have a beginner friendly explanation right after it{instruction_lang_note}.
+
+- Describe the internal implementation to help understand what's under the hood{instruction_lang_note}. First provide a non-code or code-light walkthrough on what happens step-by-step when the abstraction is called{instruction_lang_note}. It's recommended to use a simple sequenceDiagram with a dummy example - keep it minimal with at most 5 participants to ensure clarity. If participant name has space, use: `participant QP as Query Processing`. {mermaid_lang_note}.
+
+- Then dive deeper into code for the internal implementation with references to files. Provide example code blocks, but make them similarly simple and beginner-friendly. Explain{instruction_lang_note}.
+
+- IMPORTANT: When you need to refer to other core abstractions covered in other chapters, ALWAYS use proper Markdown links like this: [Chapter Title](filename.md). Use the Complete Tutorial Structure above to find the correct filename and the chapter title{link_lang_note}. Translate the surrounding text.
+
+- Use mermaid diagrams to illustrate complex concepts (```mermaid``` format). {mermaid_lang_note}.
+
+- Heavily use analogies and examples throughout{instruction_lang_note} to help beginners understand.
+
+- End the chapter with a brief conclusion that summarizes what was learned{instruction_lang_note} and provides a transition to the next chapter{instruction_lang_note}. If there is a next chapter, use a proper Markdown link: [Next Chapter Title](next_chapter_filename){link_lang_note}.
+
+- Ensure the tone is welcoming and easy for a newcomer to understand{tone_note}.
+
+- Output *only* the Markdown content for this chapter.
+
+Now, directly provide a super beginner-friendly Markdown output (DON'T need ```markdown``` tags):
+"""
+        from utils.call_llm import call_llm_with_context
+
+        # Context from related files is already in file_context_str
+        # Pass it explicitly as RAG context for better separation
+        response = call_llm_with_context(
+            prompt=prompt,
+            context=file_context_str if file_context_str else "",
+            use_cache=(use_cache and self.cur_retry == 0)
+        )
+
+        chapter_content = response
+        # Basic validation/cleanup
+        actual_heading = f"# Chapter {chapter_num}: {abstraction_name}"  # Use potentially translated name
+        if not chapter_content.strip().startswith(f"# Chapter {chapter_num}"):
+            # Add heading if missing or incorrect, trying to preserve content
+            lines = chapter_content.strip().split("\n")
+            if lines and lines[0].strip().startswith(
+                "#"
+            ):  # If there's some heading, replace it
+                lines[0] = actual_heading
+                chapter_content = "\n".join(lines)
+            else:  # Otherwise, prepend it
+                chapter_content = f"{actual_heading}\n\n{chapter_content}"
+
+        # Add the generated content to our temporary list for the next iteration's context
+        self.chapters_written_so_far.append(chapter_content)
+
+        return chapter_content  # Return the Markdown string (potentially translated)
+
+    def post(self, shared, prep_res, exec_res_list):
+        # exec_res_list contains the generated Markdown for each chapter, in order
+        shared["chapters"] = exec_res_list
+        # Clean up the temporary instance variable
+        del self.chapters_written_so_far
+        print(f"Finished writing {len(exec_res_list)} chapters.")
+
+
+class CombineTutorial(Node):
+    def prep(self, shared):
+        project_name = shared["project_name"]
+        output_base_dir = shared.get("output_dir", "output")  # Default output dir
+        output_path = os.path.join(output_base_dir, project_name)
+        repo_url = shared.get("repo_url")  # Get the repository URL
+        # language = shared.get("language", "english") # No longer needed for fixed strings
+
+        # Get potentially translated data
+        relationships_data = shared[
+            "relationships"
+        ]  # {"summary": str, "details": [{"from": int, "to": int, "label": str}]} -> summary/label potentially translated
+        chapter_order = shared["chapter_order"]  # indices
+        abstractions = shared[
+            "abstractions"
+        ]  # list of dicts -> name/description potentially translated
+        chapters_content = shared[
+            "chapters"
+        ]  # list of strings -> content potentially translated
+
+        # --- Generate Mermaid Diagram ---
+        mermaid_lines = ["flowchart TD"]
+        # Add nodes for each abstraction using potentially translated names
+        for i, abstr in enumerate(abstractions):
+            node_id = f"A{i}"
+            # Use potentially translated name, sanitize for Mermaid ID and label
+            sanitized_name = abstr["name"].replace('"', "")
+            node_label = sanitized_name  # Using sanitized name only
+            mermaid_lines.append(
+                f'    {node_id}["{node_label}"]'
+            )  # Node label uses potentially translated name
+        # Add edges for relationships using potentially translated labels
+        for rel in relationships_data["details"]:
+            from_node_id = f"A{rel['from']}"
+            to_node_id = f"A{rel['to']}"
+            # Use potentially translated label, sanitize
+            edge_label = (
+                rel["label"].replace('"', "").replace("\n", " ")
+            )  # Basic sanitization
+            max_label_len = 30
+            if len(edge_label) > max_label_len:
+                edge_label = edge_label[: max_label_len - 3] + "..."
+            mermaid_lines.append(
+                f'    {from_node_id} -- "{edge_label}" --> {to_node_id}'
+            )  # Edge label uses potentially translated label
+
+        mermaid_diagram = "\n".join(mermaid_lines)
+        # --- End Mermaid ---
+
+        # --- Prepare index.md content ---
+        index_content = f"# Tutorial: {project_name}\n\n"
+        index_content += f"{relationships_data['summary']}\n\n"  # Use the potentially translated summary directly
+        # Keep fixed strings in English
+        index_content += f"**Source Repository:** [{repo_url}]({repo_url})\n\n"
+
+        # Add Mermaid diagram for relationships (diagram itself uses potentially translated names/labels)
+        index_content += "```mermaid\n"
+        index_content += mermaid_diagram + "\n"
+        index_content += "```\n\n"
+
+        # Keep fixed strings in English
+        index_content += f"## Chapters\n\n"
+
+        chapter_files = []
+        # Generate chapter links based on the determined order, using potentially translated names
+        for i, abstraction_index in enumerate(chapter_order):
+            # Ensure index is valid and we have content for it
+            if 0 <= abstraction_index < len(abstractions) and i < len(chapters_content):
+                abstraction_name = abstractions[abstraction_index][
+                    "name"
+                ]  # Potentially translated name
+                # Sanitize potentially translated name for filename
+                safe_name = "".join(
+                    c if c.isalnum() else "_" for c in abstraction_name
+                ).lower()
+                filename = f"{i+1:02d}_{safe_name}.md"
+                index_content += f"{i+1}. [{abstraction_name}]({filename})\n"  # Use potentially translated name in link text
+
+                # Add attribution to chapter content (using English fixed string)
+                chapter_content = chapters_content[i]  # Potentially translated content
+                if not chapter_content.endswith("\n\n"):
+                    chapter_content += "\n\n"
+                # Keep fixed strings in English
+                chapter_content += f"---\n\nGenerated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)"
+
+                # Store filename and corresponding content
+                chapter_files.append({"filename": filename, "content": chapter_content})
+            else:
+                print(
+                    f"Warning: Mismatch between chapter order, abstractions, or content at index {i} (abstraction index {abstraction_index}). Skipping file generation for this entry."
+                )
+
+        # Add attribution to index content (using English fixed string)
+        index_content += f"\n\n---\n\nGenerated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)"
+
+        return {
+            "output_path": output_path,
+            "index_content": index_content,
+            "chapter_files": chapter_files,  # List of {"filename": str, "content": str}
+        }
+
+    def exec(self, prep_res):
+        output_path = prep_res["output_path"]
+        index_content = prep_res["index_content"]
+        chapter_files = prep_res["chapter_files"]
+
+        print(f"Combining tutorial into directory: {output_path}")
+        # Rely on Node's built-in retry/fallback
+        os.makedirs(output_path, exist_ok=True)
+
+        # Write index.md
+        index_filepath = os.path.join(output_path, "index.md")
+        with open(index_filepath, "w", encoding="utf-8") as f:
+            f.write(index_content)
+        print(f"  - Wrote {index_filepath}")
+
+        # Write chapter files
+        for chapter_info in chapter_files:
+            chapter_filepath = os.path.join(output_path, chapter_info["filename"])
+            with open(chapter_filepath, "w", encoding="utf-8") as f:
+                f.write(chapter_info["content"])
+            print(f"  - Wrote {chapter_filepath}")
+
+        return output_path  # Return the final path
+
+    def post(self, shared, prep_res, exec_res):
+        shared["final_output_dir"] = exec_res  # Store the output path
+        print(f"\nTutorial generation complete! Files are in: {exec_res}")
diff --git a/utils/call_llm.py b/utils/call_llm.py
index 70c9e83a..fc437f18 100644
--- a/utils/call_llm.py
+++ b/utils/call_llm.py
@@ -158,6 +158,83 @@ def call_llm(prompt: str, use_cache: bool = True) -> str:
     return response_text
 
 
+
+
+def get_open_webui_context(prompt: str, collection_name: str = "#csharpdocs") -> str:
+    """
+    Retrieve knowledge from Open WebUI RAG server.
+    """
+    api_key = os.getenv("OPEN_WEBUI_API_KEY")
+    base_url = os.getenv("OPEN_WEBUI_ENDPOINT", "http://localhost:3000")
+    
+    if not api_key:
+        logger.warning("OPEN_WEBUI_API_KEY not set. Skipping remote RAG.")
+        return ""
+
+    url = f"{base_url}/api/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    
+    # We use the prompt with the collection hash to trigger RAG
+    # We ask the model to "Provide relevant information about: ..."
+    search_prompt = f"{collection_name} Provide relevant information / documentation context for the following task/query: {prompt}"
+    
+    payload = {
+        "model": os.getenv("OLLAMA_MODEL", "qwen3:8b"), # Default to model in env
+        "messages": [{"role": "user", "content": search_prompt}],
+        "stream": False
+    }
+    
+    try:
+        logger.info(f"Querying Open WebUI RAG: {search_prompt[:50]}...")
+        response = requests.post(url, headers=headers, json=payload, timeout=300) # Longer timeout for RAG
+        response.raise_for_status()
+        result = response.json()
+        content = result["choices"][0]["message"]["content"]
+        logger.info(f"RAG Retrieval successful. Length: {len(content)}")
+        return content
+    except Exception as e:
+        logger.error(f"Open WebUI RAG Query failed: {e}")
+        return ""
+
+
+def call_llm_with_context(prompt: str, context: str = "", use_cache: bool = True, include_remote_rag: bool = False) -> str:
+    """
+    Call LLM with optional RAG context (Local + Remote) injected into the prompt.
+    
+    Args:
+        prompt: The main question/instruction for the LLM
+        context: Additional code snippets or relevant information (Local files)
+        use_cache: Whether to cache the LLM response
+        include_remote_rag: Whether to fetch context from Open WebUI RAG server
+    
+    Returns:
+        str: LLM response text
+    """
+    rag_context = context
+    
+    if include_remote_rag:
+        remote_knowledge = get_open_webui_context(prompt)
+        if remote_knowledge:
+            rag_context += f"\n\n### Remote Library Knowledge (.NET/C# Docs):\n{remote_knowledge}"
+
+    if rag_context:
+        # Build enhanced prompt with context
+        full_prompt = f"""### Relevant Code/Library Context:
+{rag_context}
+
+### Task:
+{prompt}
+
+Use the code context above to provide accurate, specific answers."""
+    else:
+        full_prompt = prompt
+    
+    return call_llm(full_prompt, use_cache)
+
+
 def _call_llm_gemini(prompt: str) -> str:
     if os.getenv("GEMINI_PROJECT_ID"):
         client = genai.Client(
diff --git a/utils/call_llm.py.backup b/utils/call_llm.py.backup
new file mode 100644
index 00000000..70c9e83a
--- /dev/null
+++ b/utils/call_llm.py.backup
@@ -0,0 +1,185 @@
+from google import genai
+import os
+import logging
+import json
+import requests
+from datetime import datetime
+
+# Configure logging
+log_directory = os.getenv("LOG_DIR", "logs")
+os.makedirs(log_directory, exist_ok=True)
+log_file = os.path.join(
+    log_directory, f"llm_calls_{datetime.now().strftime('%Y%m%d')}.log"
+)
+
+# Set up logger
+logger = logging.getLogger("llm_logger")
+logger.setLevel(logging.INFO)
+logger.propagate = False  # Prevent propagation to root logger
+file_handler = logging.FileHandler(log_file, encoding='utf-8')
+file_handler.setFormatter(
+    logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
+)
+logger.addHandler(file_handler)
+
+# Simple cache configuration
+cache_file = "llm_cache.json"
+
+
+def load_cache():
+    try:
+        with open(cache_file, 'r') as f:
+            return json.load(f)
+    except:
+        logger.warning(f"Failed to load cache.")
+    return {}
+
+
+def save_cache(cache):
+    try:
+        with open(cache_file, 'w') as f:
+            json.dump(cache, f)
+    except:
+        logger.warning(f"Failed to save cache")
+
+
+def get_llm_provider():
+    provider = os.getenv("LLM_PROVIDER")
+    if not provider and (os.getenv("GEMINI_PROJECT_ID") or os.getenv("GEMINI_API_KEY")):
+        provider = "GEMINI"
+    # if necessary, add ANTHROPIC/OPENAI
+    return provider
+
+
+def _call_llm_provider(prompt: str) -> str:
+    """
+    Call an LLM provider based on environment variables.
+    Environment variables:
+    - LLM_PROVIDER: "OLLAMA" or "XAI"
+    - <provider>_MODEL: Model name (e.g., OLLAMA_MODEL, XAI_MODEL)
+    - <provider>_BASE_URL: Base URL without endpoint (e.g., OLLAMA_BASE_URL, XAI_BASE_URL)
+    - <provider>_API_KEY: API key (e.g., OLLAMA_API_KEY, XAI_API_KEY; optional for providers that don't require it)
+    The endpoint /v1/chat/completions will be appended to the base URL.
+    """
+    logger.info(f"PROMPT: {prompt}") # log the prompt
+
+    # Read the provider from environment variable
+    provider = os.environ.get("LLM_PROVIDER")
+    if not provider:
+        raise ValueError("LLM_PROVIDER environment variable is required")
+
+    # Construct the names of the other environment variables
+    model_var = f"{provider}_MODEL"
+    base_url_var = f"{provider}_BASE_URL"
+    api_key_var = f"{provider}_API_KEY"
+
+    # Read the provider-specific variables
+    model = os.environ.get(model_var)
+    base_url = os.environ.get(base_url_var)
+    api_key = os.environ.get(api_key_var, "")  # API key is optional, default to empty string
+
+    # Validate required variables
+    if not model:
+        raise ValueError(f"{model_var} environment variable is required")
+    if not base_url:
+        raise ValueError(f"{base_url_var} environment variable is required")
+
+    # Append the endpoint to the base URL
+    url = f"{base_url.rstrip('/')}/v1/chat/completions"
+
+    # Configure headers and payload based on provider
+    headers = {
+        "Content-Type": "application/json",
+    }
+    if api_key:  # Only add Authorization header if API key is provided
+        headers["Authorization"] = f"Bearer {api_key}"
+
+    payload = {
+        "model": model,
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": 0.7,
+    }
+
+    try:
+        response = requests.post(url, headers=headers, json=payload)
+        response_json = response.json() # Log the response
+        logger.info("RESPONSE:\n%s", json.dumps(response_json, indent=2))
+        #logger.info(f"RESPONSE: {response.json()}")
+        response.raise_for_status()
+        return response.json()["choices"][0]["message"]["content"]
+    except requests.exceptions.HTTPError as e:
+        error_message = f"HTTP error occurred: {e}"
+        try:
+            error_details = response.json().get("error", "No additional details")
+            error_message += f" (Details: {error_details})"
+        except:
+            pass
+        raise Exception(error_message)
+    except requests.exceptions.ConnectionError:
+        raise Exception(f"Failed to connect to {provider} API. Check your network connection.")
+    except requests.exceptions.Timeout:
+        raise Exception(f"Request to {provider} API timed out.")
+    except requests.exceptions.RequestException as e:
+        raise Exception(f"An error occurred while making the request to {provider}: {e}")
+    except ValueError:
+        raise Exception(f"Failed to parse response as JSON from {provider}. The server might have returned an invalid response.")
+
+# By default, we Google Gemini 2.5 pro, as it shows great performance for code understanding
+def call_llm(prompt: str, use_cache: bool = True) -> str:
+    # Log the prompt
+    logger.info(f"PROMPT: {prompt}")
+
+    # Check cache if enabled
+    if use_cache:
+        # Load cache from disk
+        cache = load_cache()
+        # Return from cache if exists
+        if prompt in cache:
+            logger.info(f"RESPONSE: {cache[prompt]}")
+            return cache[prompt]
+
+    provider = get_llm_provider()
+    if provider == "GEMINI":
+        response_text = _call_llm_gemini(prompt)
+    else:  # generic method using a URL that is OpenAI compatible API (Ollama, ...)
+        response_text = _call_llm_provider(prompt)
+
+    # Log the response
+    logger.info(f"RESPONSE: {response_text}")
+
+    # Update cache if enabled
+    if use_cache:
+        # Load cache again to avoid overwrites
+        cache = load_cache()
+        # Add to cache and save
+        cache[prompt] = response_text
+        save_cache(cache)
+
+    return response_text
+
+
+def _call_llm_gemini(prompt: str) -> str:
+    if os.getenv("GEMINI_PROJECT_ID"):
+        client = genai.Client(
+            vertexai=True,
+            project=os.getenv("GEMINI_PROJECT_ID"),
+            location=os.getenv("GEMINI_LOCATION", "us-central1")
+        )
+    elif os.getenv("GEMINI_API_KEY"):
+        client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
+    else:
+        raise ValueError("Either GEMINI_PROJECT_ID or GEMINI_API_KEY must be set in the environment")
+    model = os.getenv("GEMINI_MODEL", "gemini-2.5-pro-exp-03-25")
+    response = client.models.generate_content(
+        model=model,
+        contents=[prompt]
+    )
+    return response.text
+
+if __name__ == "__main__":
+    test_prompt = "Hello, how are you?"
+
+    # First call - should hit the API
+    print("Making call...")
+    response1 = call_llm(test_prompt, use_cache=False)
+    print(f"Response: {response1}")

From 01b125bcc792b48509aff6aed83a37c800d34099 Mon Sep 17 00:00:00 2001
From: motaz m alharbi <M.Alharbi@uot.edu.ly>
Date: Sun, 14 Dec 2025 23:53:04 +0200
Subject: [PATCH 2/2] Add Remote RAG integration for open webui

---
 README.md         | 25 +++++++++++++
 nodes.py          |  8 ++++-
 utils/call_llm.py | 91 +++++++++++++++++++++++++++++++++++++----------
 3 files changed, 105 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index cc8ad4e8..ba0a6071 100644
--- a/README.md
+++ b/README.md
@@ -155,6 +155,31 @@ To run this project in a Docker container, you'll need to pass your API keys as
    ```
 </details>
 
+<details>
+
+<details>
+
+<summary> <b>New! Remote RAG Integration (Open WebUI)</b> </summary>
+
+**Supercharge your tutorials with external knowledge!**
+
+We've added support for **Remote RAG** (Retrieval-Augmented Generation). This allows PocketFlow to access your private documentation or the latest tech specs (like .NET 10, C# 14) that aren't in the codebase yet.
+
+**Capabilities:**
+- **Hybrid Analysis**: Combines local code analysis with your remote Knowledge Base.
+- **Always Up-to-Date**: Tutorials reference the latest documentation from your Open WebUI server.
+- **Plug & Play**: Just set your credentials in `.env`, and it works automatically alongside the standard analysis.
+
+**Setup:**
+Add to your `.env`:
+```bash
+# Open WebUI Configuration
+OPEN_WEBUI_ENDPOINT=http://localhost:3000
+OPEN_WEBUI_JWT_TOKEN=your_token_here
+OPEN_WEBUI_COLLECTION=your_collection_here
+```
+</details>
+
 ## 💡 Development Tutorial
 
 - I built using [**Agentic Coding**](https://zacharyhuang.substack.com/p/agentic-coding-the-most-fun-way-to), the fastest development paradigm, where humans simply [design](docs/design.md) and agents [code](flow.py).
diff --git a/nodes.py b/nodes.py
index 8f6692b7..82ff580f 100644
--- a/nodes.py
+++ b/nodes.py
@@ -517,7 +517,13 @@ def exec(self, prep_res):
 
 Now, provide the YAML output:
 """
-        response = call_llm(prompt, use_cache=(use_cache and self.cur_retry == 0)) # Use cache only if enabled and not retrying
+        from utils.call_llm import call_llm_with_context
+        response = call_llm_with_context(
+            prompt=prompt,
+            context="", # Context is already inside the prompt
+            use_cache=(use_cache and self.cur_retry == 0),
+            include_remote_rag=True
+        )
 
         # --- Validation ---
         yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
diff --git a/utils/call_llm.py b/utils/call_llm.py
index fc437f18..241f0a14 100644
--- a/utils/call_llm.py
+++ b/utils/call_llm.py
@@ -160,46 +160,93 @@ def call_llm(prompt: str, use_cache: bool = True) -> str:
 
 
 
-def get_open_webui_context(prompt: str, collection_name: str = "#csharpdocs") -> str:
+
+
+def get_open_webui_context(
+    prompt: str, 
+    collection_name: str = "csharpdocs",
+    jwt_token: str = None
+) -> str:
     """
-    Retrieve knowledge from Open WebUI RAG server.
+    Query Open WebUI knowledge collection with RAG enabled.
+    The RAG retrieval happens server-side in Open WebUI using the 'files' parameter or proper knowledge retrieval.
     """
-    api_key = os.getenv("OPEN_WEBUI_API_KEY")
-    base_url = os.getenv("OPEN_WEBUI_ENDPOINT", "http://localhost:3000")
+    # 1. Setup Auth and Config
+    if not jwt_token:
+        # Try JWT token first, fallback to API Key (which is often a Bearer token anyway)
+        jwt_token = os.getenv("OPEN_WEBUI_JWT_TOKEN") or os.getenv("OPEN_WEBUI_API_KEY")
     
-    if not api_key:
-        logger.warning("OPEN_WEBUI_API_KEY not set. Skipping remote RAG.")
+    if not jwt_token:
+        logger.warning("OPEN_WEBUI_JWT_TOKEN/API_KEY not set. Skipping RAG.")
         return ""
+    
+    # Allow overriding collection name from env
+    collection_name = os.getenv("OPEN_WEBUI_COLLECTION", collection_name)
 
-    url = f"{base_url}/api/chat/completions"
+    base_url = os.getenv("OPEN_WEBUI_ENDPOINT", "http://localhost:3000")
     headers = {
-        "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json"
+        'Authorization': f'Bearer {jwt_token}',
+        'Content-Type': 'application/json'
     }
-    
-    # We use the prompt with the collection hash to trigger RAG
-    # We ask the model to "Provide relevant information about: ..."
-    search_prompt = f"{collection_name} Provide relevant information / documentation context for the following task/query: {prompt}"
+
+    # 2. Get Collection ID from Name
+    collection_id = None
+    try:
+        collections_url = f"{base_url}/api/v1/knowledge/"
+        # Use short timeout for list
+        resp = requests.get(collections_url, headers=headers, timeout=10)
+        resp.raise_for_status()
+        collections = resp.json()
+        
+        for col in collections:
+            if col.get('name', '').lower() == collection_name.lower():
+                collection_id = col['id']
+                break
+        
+        if not collection_id:
+            logger.warning(f"Collection '{collection_name}' not found in Open WebUI. Returning empty RAG context.")
+            return ""
+            
+        logger.info(f"Found Knowledge Collection: {collection_name} -> {collection_id}")
+        
+    except Exception as e:
+        logger.error(f"Failed to lookup Knowledge Collection ID: {e}")
+        return ""
+
+    # 3. Query Chat Completions with RAG context
+    # This triggers the server-side RAG engine because we pass the 'files' parameter
+    chat_url = f"{base_url}/api/chat/completions"
     
     payload = {
-        "model": os.getenv("OLLAMA_MODEL", "qwen3:8b"), # Default to model in env
-        "messages": [{"role": "user", "content": search_prompt}],
+        "model": os.getenv("OLLAMA_MODEL", "qwen3:8b"),
+        "messages": [
+            {"role": "user", "content": prompt}
+        ],
+        "files": [
+            {
+                "type": "collection",
+                "id": collection_id
+            }
+        ],
         "stream": False
     }
     
     try:
-        logger.info(f"Querying Open WebUI RAG: {search_prompt[:50]}...")
-        response = requests.post(url, headers=headers, json=payload, timeout=300) # Longer timeout for RAG
+        logger.info(f"Querying Open WebUI RAG for: '{prompt[:50]}...'")
+        response = requests.post(chat_url, headers=headers, json=payload, timeout=300) # 5 min timeout for RAG
         response.raise_for_status()
         result = response.json()
+        
         content = result["choices"][0]["message"]["content"]
-        logger.info(f"RAG Retrieval successful. Length: {len(content)}")
+        logger.info(f"RAG Retrieval Successful. Response Length: {len(content)}")
         return content
+        
     except Exception as e:
         logger.error(f"Open WebUI RAG Query failed: {e}")
         return ""
 
 
+
 def call_llm_with_context(prompt: str, context: str = "", use_cache: bool = True, include_remote_rag: bool = False) -> str:
     """
     Call LLM with optional RAG context (Local + Remote) injected into the prompt.
@@ -234,6 +281,14 @@ def call_llm_with_context(prompt: str, context: str = "", use_cache: bool = True
     
     return call_llm(full_prompt, use_cache)
 
+if __name__ == "__main__":
+    # Test block as requested
+    from dotenv import load_dotenv
+    load_dotenv()
+    print("Testing get_open_webui_context...")
+    res = get_open_webui_context("file-based programs .NET 10")
+    print(f"Retrieved: {res[:200]}")
+
 
 def _call_llm_gemini(prompt: str) -> str:
     if os.getenv("GEMINI_PROJECT_ID"):