The-Pocket · mzmzeee · Dec 14, 2025 · Dec 14, 2025
diff --git a/README.md b/README.md
@@ -155,6 +155,31 @@ To run this project in a Docker container, you'll need to pass your API keys as
    ```
 </details>
 
+<details>
+
+<details>
+
+<summary> <b>New! Remote RAG Integration (Open WebUI)</b> </summary>
+
+**Supercharge your tutorials with external knowledge!**
+
+We've added support for **Remote RAG** (Retrieval-Augmented Generation). This allows PocketFlow to access your private documentation or the latest tech specs (like .NET 10, C# 14) that aren't in the codebase yet.
+
+**Capabilities:**
+- **Hybrid Analysis**: Combines local code analysis with your remote Knowledge Base.
+- **Always Up-to-Date**: Tutorials reference the latest documentation from your Open WebUI server.
+- **Plug & Play**: Just set your credentials in `.env`, and it works automatically alongside the standard analysis.
+
+**Setup:**
+Add to your `.env`:
+```bash
+# Open WebUI Configuration
+OPEN_WEBUI_ENDPOINT=http://localhost:3000
+OPEN_WEBUI_JWT_TOKEN=your_token_here
+OPEN_WEBUI_COLLECTION=your_collection_here
+```
+</details>
+
 ## 💡 Development Tutorial
 
 - I built using [**Agentic Coding**](https://zacharyhuang.substack.com/p/agentic-coding-the-most-fun-way-to), the fastest development paradigm, where humans simply [design](docs/design.md) and agents [code](flow.py).

diff --git a/nodes.py b/nodes.py
@@ -113,6 +113,7 @@ def create_llm_context(files_data):
             language,
             use_cache,
             max_abstraction_num,
+            files_data,
         )  # Return all parameters
 
     def exec(self, prep_res):
@@ -124,6 +125,7 @@ def exec(self, prep_res):
             language,
             use_cache,
             max_abstraction_num,
+            files_data,
         ) = prep_res  # Unpack all parameters
         print(f"Identifying abstractions using LLM...")
 
@@ -173,7 +175,25 @@ def exec(self, prep_res):
     - 5 # path/to/another.js
 # ... up to {max_abstraction_num} abstractions
 ```"""
-        response = call_llm(prompt, use_cache=(use_cache and self.cur_retry == 0))  # Use cache only if enabled and not retrying
+        # Build context from most important files (first 5 files by size/relevance)
+        context_files = sorted(
+            [(path, content) for path, content in files_data],
+            key=lambda x: len(x[1]),
+            reverse=True
+        )[:5]
+
+        rag_context = "\n\n".join([
+            f"=== {path} ===\n{content[:2000]}"  # First 2000 chars per file
+            for path, content in context_files
+        ])
+
+        from utils.call_llm import call_llm_with_context
+        response = call_llm_with_context(
+            prompt=prompt,
+            context=rag_context,
+            use_cache=(use_cache and self.cur_retry == 0),
+            include_remote_rag=True
+        )
 
         # --- Validation ---
         yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
@@ -344,7 +364,18 @@ def exec(self, prep_res):
 
 Now, provide the YAML output:
 """
-        response = call_llm(prompt, use_cache=(use_cache and self.cur_retry == 0)) # Use cache only if enabled and not retrying
+        # Use already gathered context from prep (relevant_files_content_map has the files)
+        # But we can add a focused snippet for relationship analysis
+        from utils.call_llm import call_llm_with_context
+
+        # Context is already built in prep via file_context_str
+        # We'll pass it as additional context
+        response = call_llm_with_context(
+            prompt=prompt,
+            context="",  # Context already in prompt, no need to duplicate
+            use_cache=(use_cache and self.cur_retry == 0),
+            include_remote_rag=True
+        )
 
         # --- Validation ---
         yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
@@ -486,7 +517,13 @@ def exec(self, prep_res):
 
 Now, provide the YAML output:
 """
-        response = call_llm(prompt, use_cache=(use_cache and self.cur_retry == 0)) # Use cache only if enabled and not retrying
+        from utils.call_llm import call_llm_with_context
+        response = call_llm_with_context(
+            prompt=prompt,
+            context="", # Context is already inside the prompt
+            use_cache=(use_cache and self.cur_retry == 0),
+            include_remote_rag=True
+        )
 
         # --- Validation ---
         yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
@@ -723,7 +760,18 @@ def exec(self, item):
 
 Now, directly provide a super beginner-friendly Markdown output (DON'T need ```markdown``` tags):
 """
-        chapter_content = call_llm(prompt, use_cache=(use_cache and self.cur_retry == 0)) # Use cache only if enabled and not retrying
+        from utils.call_llm import call_llm_with_context
+
+        # Context from related files is already in file_context_str
+        # Pass it explicitly as RAG context for better separation
+        response = call_llm_with_context(
+            prompt=prompt,
+            context=file_context_str if file_context_str else "",
+            use_cache=(use_cache and self.cur_retry == 0),
+            include_remote_rag=True
+        )
+
+        chapter_content = response
         # Basic validation/cleanup
         actual_heading = f"# Chapter {chapter_num}: {abstraction_name}"  # Use potentially translated name
         if not chapter_content.strip().startswith(f"# Chapter {chapter_num}"):