Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,5 @@ wheels/
# Virtual environments
.venv

# Custom
*_data/
*.epub
# Books
books/
40 changes: 33 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,52 @@

![reader3](reader3.png)

A lightweight, self-hosted EPUB reader that lets you read through EPUB books one chapter at a time. This makes it very easy to copy paste the contents of a chapter to an LLM, to read along. Basically - get epub books (e.g. [Project Gutenberg](https://www.gutenberg.org/) has many), open them up in this reader, copy paste text around to your favorite LLM, and read together and along.
A lightweight, self-hosted EPUB/PDF reader that lets you read through EPUB/PDF books one chapter at a time. This makes it very easy to copy paste the contents of a chapter to an LLM, to read along. Basically - get epub/pdf books (e.g. [Project Gutenberg](https://www.gutenberg.org/) has many), open them up in this reader, copy paste text around to your favorite LLM, and read together and along.

This project was 90% vibe coded just to illustrate how one can very easily [read books together with LLMs](https://x.com/karpathy/status/1990577951671509438). I'm not going to support it in any way, it's provided here as is for other people's inspiration and I don't intend to improve it. Code is ephemeral now and libraries are over, ask your LLM to change it in whatever way you like.
This project was 90% vibe coded just to illustrate how one can very easily [read books together with LLMs](https://x.com/karpathy/status/1990577951671509438) by Mr. Karpathy.

After branching off, I added pdf support, and a chat pane to allow the similar behavior like Gemini pane when you enable it inside Chrome browser. Currently, it only supports querying with selected text automatically to remote LLMs of your choice. Some interesting ideas could be starting from here, like how to do prompt and context management for the LLMs to make it more effective.

## Usage

The project uses [uv](https://docs.astral.sh/uv/). So for example, download [Dracula EPUB3](https://www.gutenberg.org/ebooks/345) to this directory as `dracula.epub`, then:
The project uses [uv](https://docs.astral.sh/uv/). All books (source files and processed data) live in the `books/` subdirectory. For example, download [Dracula EPUB3](https://www.gutenberg.org/ebooks/345), then:

```bash
uv run reader3.py ~/Downloads/dracula.epub
```

This creates the directory `books/dracula_data`, which registers the book to your local library.

Similarly, you can also import PDF files. Just run the same command on a `.pdf` file:

```bash
uv run reader3.py ~/Downloads/mydocument.pdf
```

### Testing

Run the integration test suite to verify the application:

```bash
uv run reader3.py dracula.epub
uv run pytest
```

This creates the directory `dracula_data`, which registers the book to your local library. We can then run the server:
### Running the Server

We can then run the server:

```bash
uv run server.py
```

And visit [localhost:8123](http://localhost:8123/) to see your current Library. You can easily add more books, or delete them from your library by deleting the folder. It's not supposed to be complicated or complex.
To stop the server:

```bash
uv run stop_server.py
```

And visit [localhost:8123](http://localhost:8123/) to see your current Library. You can easily add more books, or delete them from your library by deleting their folder under `books/`. It's not supposed to be complicated or complex.

## License

MIT
MIT
104 changes: 104 additions & 0 deletions annotations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import os
import json
import uuid
from datetime import datetime
from typing import List, Optional, Literal
from pydantic import BaseModel, Field

# --- Data Models ---

class AnnotationTarget(BaseModel):
chapter_index: int
# For EPUB:
cfi: Optional[str] = None
quote: Optional[str] = None
# For PDF:
page_num: Optional[int] = None
rect: Optional[List[float]] = None

class ChatMessage(BaseModel):
role: str
content: str

class AnnotationContent(BaseModel):
text: Optional[str] = None # Markdown string for notes
color: Optional[str] = None # e.g. "#ffff00"
chat_messages: Optional[List[ChatMessage]] = None

class Annotation(BaseModel):
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
created_at: str = Field(default_factory=lambda: datetime.utcnow().isoformat())
type: Literal['highlight', 'note', 'chat_thread']
target: AnnotationTarget
content: AnnotationContent

# --- Storage Logic ---

def _get_annotations_path(books_dir: str, book_id: str) -> str:
return os.path.join(books_dir, book_id, "annotations.json")

def load_annotations(books_dir: str, book_id: str) -> List[Annotation]:
path = _get_annotations_path(books_dir, book_id)
if not os.path.exists(path):
return []

try:
with open(path, "r", encoding="utf-8") as f:
raw_data = json.load(f)
return [Annotation(**item) for item in raw_data]
except Exception as e:
print(f"Error loading annotations for {book_id}: {e}")
return []

def save_annotation_to_disk(books_dir: str, book_id: str, new_annotation: Annotation):
# Load existing
annotations = load_annotations(books_dir, book_id)
annotations.append(new_annotation)

# Save back
path = _get_annotations_path(books_dir, book_id)
os.makedirs(os.path.dirname(path), exist_ok=True)
try:
with open(path, "w", encoding="utf-8") as f:
# dumping model_dump(mode='json') handles datetime/uuid serialization
json.dump([a.model_dump(mode='json') for a in annotations], f, indent=2)
except Exception as e:
print(f"Error saving annotation for {book_id}: {e}")
raise e

def delete_annotation_from_disk(books_dir: str, book_id: str, annotation_id: str):
annotations = load_annotations(books_dir, book_id)
filtered = [a for a in annotations if a.id != annotation_id]

if len(filtered) == len(annotations):
return False # ID not found

path = _get_annotations_path(books_dir, book_id)
try:
with open(path, "w", encoding="utf-8") as f:
json.dump([a.model_dump(mode='json') for a in filtered], f, indent=2)
return True
except Exception as e:
print(f"Error deleting annotation for {book_id}: {e}")
raise e

def update_annotation_in_disk(books_dir: str, book_id: str, updated_annotation: Annotation):
annotations = load_annotations(books_dir, book_id)
found = False
for i, a in enumerate(annotations):
if a.id == updated_annotation.id:
annotations[i] = updated_annotation
found = True
break

if not found:
return False

path = _get_annotations_path(books_dir, book_id)
try:
with open(path, "w", encoding="utf-8") as f:
json.dump([a.model_dump(mode='json') for a in annotations], f, indent=2)
return True
except Exception as e:
print(f"Error updating annotation for {book_id}: {e}")
raise e
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,14 @@ description = "Simple EPUB reader web app"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"anthropic>=0.79.0",
"beautifulsoup4>=4.14.2",
"ebooklib>=0.20",
"fastapi>=0.121.2",
"httpx>=0.28.1",
"jinja2>=3.1.6",
"openai>=2.20.0",
"pydantic>=2.12.4",
"pymupdf>=1.27.1",
"uvicorn>=0.38.0",
]
Binary file modified reader3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
63 changes: 60 additions & 3 deletions reader3.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import ebooklib
from ebooklib import epub
from bs4 import BeautifulSoup, Comment
import fitz # PyMuPDF

# --- Data structures ---

Expand Down Expand Up @@ -283,6 +284,52 @@ def process_epub(epub_path: str, output_dir: str) -> Book:
return final_book


def process_pdf(pdf_path: str, output_dir: str) -> Book:
"""
Extracts metadata from a PDF and returns a Book object.
Does NOT convert pages to HTML/Images.
"""
print(f"Processing PDF {pdf_path}...")

# 1. Load PDF
doc = fitz.open(pdf_path)

# 2. Extract Metadata
meta = doc.metadata

# PyMuPDF metadata keys: format, title, author, subject, keywords, creator, producer, creationDate, modDate
metadata = BookMetadata(
title=meta.get('title') or os.path.basename(pdf_path).replace('.pdf', ''),
language="en", # default
authors=[meta.get('author')] if meta.get('author') else [],
description=meta.get('subject'),
publisher=meta.get('producer'),
date=meta.get('creationDate'),
identifiers=[],
subjects=meta.get('keywords', '').split(',') if meta.get('keywords') else []
)

# 3. Create Output Directory
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
os.makedirs(output_dir, exist_ok=True)

# Save absolute path to original file in the book object?
# Or better: Copy the PDF to the output directory so it's self-contained
shutil.copy2(pdf_path, os.path.join(output_dir, "original.pdf"))

final_book = Book(
metadata=metadata,
spine=[],
toc=[],
images={},
source_file="original.pdf", # We will look for this in server
processed_at=datetime.now().isoformat()
)

return final_book


def save_to_pickle(book: Book, output_dir: str):
p_path = os.path.join(output_dir, 'book.pkl')
with open(p_path, 'wb') as f:
Expand All @@ -301,9 +348,19 @@ def save_to_pickle(book: Book, output_dir: str):

epub_file = sys.argv[1]
assert os.path.exists(epub_file), "File not found."
out_dir = os.path.splitext(epub_file)[0] + "_data"

book_obj = process_epub(epub_file, out_dir)

# Sanitize the directory name
original_base_name = os.path.splitext(os.path.basename(epub_file))[0]
safe_base_name = "".join([c for c in original_base_name if c.isalnum() or c in '._-']).strip()
base_name = safe_base_name + "_data"

out_dir = os.path.join("books", base_name)
os.makedirs("books", exist_ok=True)

if epub_file.lower().endswith('.pdf'):
book_obj = process_pdf(epub_file, out_dir)
else:
book_obj = process_epub(epub_file, out_dir)
save_to_pickle(book_obj, out_dir)
print("\n--- Summary ---")
print(f"Title: {book_obj.metadata.title}")
Expand Down
Loading