karpathy · jinchenglee · Feb 11, 2026 · Feb 12, 2026 · Feb 12, 2026 · Feb 12, 2026
diff --git a/.gitignore b/.gitignore
@@ -9,6 +9,5 @@ wheels/
 # Virtual environments
 .venv
 
-# Custom
-*_data/
-*.epub
+# Books
+books/
diff --git a/README.md b/README.md
@@ -2,26 +2,52 @@
 
 ![reader3](reader3.png)
 
-A lightweight, self-hosted EPUB reader that lets you read through EPUB books one chapter at a time. This makes it very easy to copy paste the contents of a chapter to an LLM, to read along. Basically - get epub books (e.g. [Project Gutenberg](https://www.gutenberg.org/) has many), open them up in this reader, copy paste text around to your favorite LLM, and read together and along.
+A lightweight, self-hosted EPUB/PDF reader that lets you read through EPUB/PDF books one chapter at a time. This makes it very easy to copy paste the contents of a chapter to an LLM, to read along. Basically - get epub/pdf books (e.g. [Project Gutenberg](https://www.gutenberg.org/) has many), open them up in this reader, copy paste text around to your favorite LLM, and read together and along.
 
-This project was 90% vibe coded just to illustrate how one can very easily [read books together with LLMs](https://x.com/karpathy/status/1990577951671509438). I'm not going to support it in any way, it's provided here as is for other people's inspiration and I don't intend to improve it. Code is ephemeral now and libraries are over, ask your LLM to change it in whatever way you like.
+This project was 90% vibe coded just to illustrate how one can very easily [read books together with LLMs](https://x.com/karpathy/status/1990577951671509438) by Mr. Karpathy. 
+
+After branching off, I added pdf support, and a chat pane to allow the similar behavior like Gemini pane when you enable it inside Chrome browser. Currently, it only supports querying with selected text automatically to remote LLMs of your choice. Some interesting ideas could be starting from here, like how to do prompt and context management for the LLMs to make it more effective.
 
 ## Usage
 
-The project uses [uv](https://docs.astral.sh/uv/). So for example, download [Dracula EPUB3](https://www.gutenberg.org/ebooks/345) to this directory as `dracula.epub`, then:
+The project uses [uv](https://docs.astral.sh/uv/). All books (source files and processed data) live in the `books/` subdirectory. For example, download [Dracula EPUB3](https://www.gutenberg.org/ebooks/345), then:
+
+```bash
+uv run reader3.py ~/Downloads/dracula.epub
+```
+
+This creates the directory `books/dracula_data`, which registers the book to your local library.
+
+Similarly, you can also import PDF files. Just run the same command on a `.pdf` file:
+
+```bash
+uv run reader3.py ~/Downloads/mydocument.pdf
+```
+
+### Testing
+
+Run the integration test suite to verify the application:
 
 ```bash
-uv run reader3.py dracula.epub
+uv run pytest 
 ```
 
-This creates the directory `dracula_data`, which registers the book to your local library. We can then run the server:
+### Running the Server
+
+We can then run the server:
 
 ```bash
 uv run server.py
 ```
 
-And visit [localhost:8123](http://localhost:8123/) to see your current Library. You can easily add more books, or delete them from your library by deleting the folder. It's not supposed to be complicated or complex.
+To stop the server:
+
+```bash
+uv run stop_server.py
+```
+
+And visit [localhost:8123](http://localhost:8123/) to see your current Library. You can easily add more books, or delete them from your library by deleting their folder under `books/`. It's not supposed to be complicated or complex.
 
 ## License
 
-MIT
+MIT
diff --git a/annotations.py b/annotations.py
@@ -0,0 +1,104 @@
+import os
+import json
+import uuid
+from datetime import datetime
+from typing import List, Optional, Literal
+from pydantic import BaseModel, Field
+
+# --- Data Models ---
+
+class AnnotationTarget(BaseModel):
+    chapter_index: int
+    # For EPUB:
+    cfi: Optional[str] = None 
+    quote: Optional[str] = None
+    # For PDF:
+    page_num: Optional[int] = None
+    rect: Optional[List[float]] = None 
+
+class ChatMessage(BaseModel):
+    role: str
+    content: str
+
+class AnnotationContent(BaseModel):
+    text: Optional[str] = None  # Markdown string for notes
+    color: Optional[str] = None # e.g. "#ffff00"
+    chat_messages: Optional[List[ChatMessage]] = None
+
+class Annotation(BaseModel):
+    id: str = Field(default_factory=lambda: str(uuid.uuid4()))
+    created_at: str = Field(default_factory=lambda: datetime.utcnow().isoformat())
+    type: Literal['highlight', 'note', 'chat_thread']
+    target: AnnotationTarget
+    content: AnnotationContent
+
+# --- Storage Logic ---
+
+def _get_annotations_path(books_dir: str, book_id: str) -> str:
+    return os.path.join(books_dir, book_id, "annotations.json")
+
+def load_annotations(books_dir: str, book_id: str) -> List[Annotation]:
+    path = _get_annotations_path(books_dir, book_id)
+    if not os.path.exists(path):
+        return []
+
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            raw_data = json.load(f)
+            return [Annotation(**item) for item in raw_data]
+    except Exception as e:
+        print(f"Error loading annotations for {book_id}: {e}")
+        return []
+
+def save_annotation_to_disk(books_dir: str, book_id: str, new_annotation: Annotation):
+    # Load existing
+    annotations = load_annotations(books_dir, book_id)
+    annotations.append(new_annotation)
+
+    # Save back
+    path = _get_annotations_path(books_dir, book_id)
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    try:
+        with open(path, "w", encoding="utf-8") as f:
+            # dumping model_dump(mode='json') handles datetime/uuid serialization
+            json.dump([a.model_dump(mode='json') for a in annotations], f, indent=2)
+    except Exception as e:
+        print(f"Error saving annotation for {book_id}: {e}")
+        raise e
+
+def delete_annotation_from_disk(books_dir: str, book_id: str, annotation_id: str):
+    annotations = load_annotations(books_dir, book_id)
+    filtered = [a for a in annotations if a.id != annotation_id]
+
+    if len(filtered) == len(annotations):
+        return False # ID not found
+
+    path = _get_annotations_path(books_dir, book_id)
+    try:
+        with open(path, "w", encoding="utf-8") as f:
+            json.dump([a.model_dump(mode='json') for a in filtered], f, indent=2)
+        return True
+    except Exception as e:
+        print(f"Error deleting annotation for {book_id}: {e}")
+        raise e
+
+def update_annotation_in_disk(books_dir: str, book_id: str, updated_annotation: Annotation):
+    annotations = load_annotations(books_dir, book_id)
+    found = False
+    for i, a in enumerate(annotations):
+        if a.id == updated_annotation.id:
+            annotations[i] = updated_annotation
+            found = True
+            break
+
+    if not found:
+        return False
+
+    path = _get_annotations_path(books_dir, book_id)
+    try:
+        with open(path, "w", encoding="utf-8") as f:
+            json.dump([a.model_dump(mode='json') for a in annotations], f, indent=2)
+        return True
+    except Exception as e:
+        print(f"Error updating annotation for {book_id}: {e}")
+        raise e
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,9 +5,14 @@ description = "Simple EPUB reader web app"
 readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
+    "anthropic>=0.79.0",
     "beautifulsoup4>=4.14.2",
     "ebooklib>=0.20",
     "fastapi>=0.121.2",
+    "httpx>=0.28.1",
     "jinja2>=3.1.6",
+    "openai>=2.20.0",
+    "pydantic>=2.12.4",
+    "pymupdf>=1.27.1",
     "uvicorn>=0.38.0",
 ]
diff --git a/reader3.png b/reader3.png
diff --git a/reader3.py b/reader3.py
@@ -13,6 +13,7 @@
 import ebooklib
 from ebooklib import epub
 from bs4 import BeautifulSoup, Comment
+import fitz # PyMuPDF
 
 # --- Data structures ---
 
@@ -283,6 +284,52 @@ def process_epub(epub_path: str, output_dir: str) -> Book:
     return final_book
 
 
+def process_pdf(pdf_path: str, output_dir: str) -> Book:
+    """
+    Extracts metadata from a PDF and returns a Book object.
+    Does NOT convert pages to HTML/Images.
+    """
+    print(f"Processing PDF {pdf_path}...")
+
+    # 1. Load PDF
+    doc = fitz.open(pdf_path)
+
+    # 2. Extract Metadata
+    meta = doc.metadata
+
+    # PyMuPDF metadata keys: format, title, author, subject, keywords, creator, producer, creationDate, modDate
+    metadata = BookMetadata(
+        title=meta.get('title') or os.path.basename(pdf_path).replace('.pdf', ''),
+        language="en", # default
+        authors=[meta.get('author')] if meta.get('author') else [],
+        description=meta.get('subject'),
+        publisher=meta.get('producer'),
+        date=meta.get('creationDate'),
+        identifiers=[],
+        subjects=meta.get('keywords', '').split(',') if meta.get('keywords') else []
+    )
+
+    # 3. Create Output Directory
+    if os.path.exists(output_dir):
+        shutil.rmtree(output_dir)
+    os.makedirs(output_dir, exist_ok=True)
+
+    # Save absolute path to original file in the book object? 
+    # Or better: Copy the PDF to the output directory so it's self-contained
+    shutil.copy2(pdf_path, os.path.join(output_dir, "original.pdf"))
+
+    final_book = Book(
+        metadata=metadata,
+        spine=[],
+        toc=[],
+        images={},
+        source_file="original.pdf", # We will look for this in server
+        processed_at=datetime.now().isoformat()
+    )
+
+    return final_book
+
+
 def save_to_pickle(book: Book, output_dir: str):
     p_path = os.path.join(output_dir, 'book.pkl')
     with open(p_path, 'wb') as f:
@@ -301,9 +348,19 @@ def save_to_pickle(book: Book, output_dir: str):
 
     epub_file = sys.argv[1]
     assert os.path.exists(epub_file), "File not found."
-    out_dir = os.path.splitext(epub_file)[0] + "_data"
-
-    book_obj = process_epub(epub_file, out_dir)
+
+    # Sanitize the directory name
+    original_base_name = os.path.splitext(os.path.basename(epub_file))[0]
+    safe_base_name = "".join([c for c in original_base_name if c.isalnum() or c in '._-']).strip()
+    base_name = safe_base_name + "_data"
+
+    out_dir = os.path.join("books", base_name)
+    os.makedirs("books", exist_ok=True)
+
+    if epub_file.lower().endswith('.pdf'):
+        book_obj = process_pdf(epub_file, out_dir)
+    else:
+        book_obj = process_epub(epub_file, out_dir)
     save_to_pickle(book_obj, out_dir)
     print("\n--- Summary ---")
     print(f"Title: {book_obj.metadata.title}")
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,6 +9,5 @@ wheels/ @@
     # Virtual environments
     .venv
-    # Custom
-    *_data/
-    *.epub
+    # Books
+    books/