From 1e07af5fbc0f6e33958b6dee0c62ac5550b5ecca Mon Sep 17 00:00:00 2001 From: JC Li <> Date: Wed, 11 Feb 2026 12:23:39 -0800 Subject: [PATCH 1/7] feat: Add PDF support, AI chat sidebar, and enhanced reader UI - Add PDF processing via PyMuPDF with metadata extraction - Add PDF.js-based viewer with HiDPI rendering, zoom, fit-width/fit-page, and ToC sidebar populated from PDF outline - Add AI chat sidebar component (OpenAI/Anthropic/custom provider support) with server-side proxy to avoid CORS issues - Redesign EPUB reader with dark toolbar header, zoom controls, fit-width/fit-page, toggleable/resizable ToC sidebar, and proper content margins - Fix EPUB internal link navigation by intercepting content links and routing through spineMap lookup - Move book storage to books/ subdirectory for cleaner repo structure - Add server shutdown endpoint and stop_server.py utility - Add new dependencies: pymupdf, httpx, openai, anthropic - Update README with PDF usage instructions and new directory layout --- .gitignore | 5 +- README.md | 22 +- pyproject.toml | 4 + reader3.py | 58 ++- server.py | 113 ++++- stop_server.py | 18 + templates/components/chat_component.html | 471 ++++++++++++++++++ templates/library.html | 90 +++- templates/pdf_reader.html | 446 +++++++++++++++++ templates/reader.html | 585 +++++++++++++++++++---- uv.lock | 221 +++++++++ 11 files changed, 1911 insertions(+), 122 deletions(-) create mode 100644 stop_server.py create mode 100644 templates/components/chat_component.html create mode 100644 templates/pdf_reader.html diff --git a/.gitignore b/.gitignore index 9e1d25d..d6c3ab0 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,5 @@ wheels/ # Virtual environments .venv -# Custom -*_data/ -*.epub +# Books +books/ diff --git a/README.md b/README.md index 5d868d7..bb6e77c 100644 --- a/README.md +++ b/README.md @@ -8,19 +8,33 @@ This project was 90% vibe coded just to illustrate how one can very easily [read ## Usage -The project uses [uv](https://docs.astral.sh/uv/). So for example, download [Dracula EPUB3](https://www.gutenberg.org/ebooks/345) to this directory as `dracula.epub`, then: +The project uses [uv](https://docs.astral.sh/uv/). All books (source files and processed data) live in the `books/` subdirectory. For example, download [Dracula EPUB3](https://www.gutenberg.org/ebooks/345) into `books/`, then: ```bash -uv run reader3.py dracula.epub +uv run reader3.py books/dracula.epub ``` -This creates the directory `dracula_data`, which registers the book to your local library. We can then run the server: +This creates the directory `books/dracula_data`, which registers the book to your local library. + +### PDF Support + +You can also read PDF files. Just run the same command on a `.pdf` file: + +```bash +uv run reader3.py books/mydocument.pdf +``` + +This will register the PDF in the library. When you open it, the browser's native PDF viewer will be used. + +### Running the Server + +We can then run the server: ```bash uv run server.py ``` -And visit [localhost:8123](http://localhost:8123/) to see your current Library. You can easily add more books, or delete them from your library by deleting the folder. It's not supposed to be complicated or complex. +And visit [localhost:8123](http://localhost:8123/) to see your current Library. You can easily add more books, or delete them from your library by deleting their folder under `books/`. It's not supposed to be complicated or complex. ## License diff --git a/pyproject.toml b/pyproject.toml index 31e6179..effa5ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,9 +5,13 @@ description = "Simple EPUB reader web app" readme = "README.md" requires-python = ">=3.10" dependencies = [ + "anthropic>=0.79.0", "beautifulsoup4>=4.14.2", "ebooklib>=0.20", "fastapi>=0.121.2", + "httpx>=0.28.1", "jinja2>=3.1.6", + "openai>=2.20.0", + "pymupdf>=1.27.1", "uvicorn>=0.38.0", ] diff --git a/reader3.py b/reader3.py index d0b9d3f..e569c7b 100644 --- a/reader3.py +++ b/reader3.py @@ -13,6 +13,7 @@ import ebooklib from ebooklib import epub from bs4 import BeautifulSoup, Comment +import fitz # PyMuPDF # --- Data structures --- @@ -283,6 +284,52 @@ def process_epub(epub_path: str, output_dir: str) -> Book: return final_book +def process_pdf(pdf_path: str, output_dir: str) -> Book: + """ + Extracts metadata from a PDF and returns a Book object. + Does NOT convert pages to HTML/Images. + """ + print(f"Processing PDF {pdf_path}...") + + # 1. Load PDF + doc = fitz.open(pdf_path) + + # 2. Extract Metadata + meta = doc.metadata + + # PyMuPDF metadata keys: format, title, author, subject, keywords, creator, producer, creationDate, modDate + metadata = BookMetadata( + title=meta.get('title') or os.path.basename(pdf_path).replace('.pdf', ''), + language="en", # default + authors=[meta.get('author')] if meta.get('author') else [], + description=meta.get('subject'), + publisher=meta.get('producer'), + date=meta.get('creationDate'), + identifiers=[], + subjects=meta.get('keywords', '').split(',') if meta.get('keywords') else [] + ) + + # 3. Create Output Directory + if os.path.exists(output_dir): + shutil.rmtree(output_dir) + os.makedirs(output_dir, exist_ok=True) + + # Save absolute path to original file in the book object? + # Or better: Copy the PDF to the output directory so it's self-contained + shutil.copy2(pdf_path, os.path.join(output_dir, "original.pdf")) + + final_book = Book( + metadata=metadata, + spine=[], + toc=[], + images={}, + source_file="original.pdf", # We will look for this in server + processed_at=datetime.now().isoformat() + ) + + return final_book + + def save_to_pickle(book: Book, output_dir: str): p_path = os.path.join(output_dir, 'book.pkl') with open(p_path, 'wb') as f: @@ -301,9 +348,14 @@ def save_to_pickle(book: Book, output_dir: str): epub_file = sys.argv[1] assert os.path.exists(epub_file), "File not found." - out_dir = os.path.splitext(epub_file)[0] + "_data" - - book_obj = process_epub(epub_file, out_dir) + base_name = os.path.splitext(os.path.basename(epub_file))[0] + "_data" + out_dir = os.path.join("books", base_name) + os.makedirs("books", exist_ok=True) + + if epub_file.lower().endswith('.pdf'): + book_obj = process_pdf(epub_file, out_dir) + else: + book_obj = process_epub(epub_file, out_dir) save_to_pickle(book_obj, out_dir) print("\n--- Summary ---") print(f"Title: {book_obj.metadata.title}") diff --git a/server.py b/server.py index 9c870dc..61631e8 100644 --- a/server.py +++ b/server.py @@ -3,18 +3,23 @@ from functools import lru_cache from typing import Optional -from fastapi import FastAPI, Request, HTTPException -from fastapi.responses import HTMLResponse, FileResponse +from fastapi import FastAPI, Request, HTTPException, Body +from fastapi.responses import HTMLResponse, FileResponse, JSONResponse from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates +import httpx +import os +import signal +import sys from reader3 import Book, BookMetadata, ChapterContent, TOCEntry app = FastAPI() +app.mount("/books", StaticFiles(directory="books"), name="books") templates = Jinja2Templates(directory="templates") # Where are the book folders located? -BOOKS_DIR = "." +BOOKS_DIR = "books" @lru_cache(maxsize=10) def load_book_cached(folder_name: str) -> Optional[Book]: @@ -42,7 +47,8 @@ async def library_view(request: Request): # Scan directory for folders ending in '_data' that have a book.pkl if os.path.exists(BOOKS_DIR): for item in os.listdir(BOOKS_DIR): - if item.endswith("_data") and os.path.isdir(item): + item_path = os.path.join(BOOKS_DIR, item) + if item.endswith("_data") and os.path.isdir(item_path): # Try to load it to get the title book = load_book_cached(item) if book: @@ -56,9 +62,23 @@ async def library_view(request: Request): return templates.TemplateResponse("library.html", {"request": request, "books": books}) @app.get("/read/{book_id}", response_class=HTMLResponse) -async def redirect_to_first_chapter(book_id: str): - """Helper to just go to chapter 0.""" - return await read_chapter(book_id=book_id, chapter_index=0) +async def redirect_to_first_chapter(request: Request, book_id: str): + """Helper to just go to chapter 0 OR open PDF.""" + book = load_book_cached(book_id) + if not book: + raise HTTPException(status_code=404, detail="Book not found") + + # Check if it is a PDF + # We stored "original.pdf" as source_file for PDFs + if book.source_file.endswith('.pdf'): + return templates.TemplateResponse("pdf_reader.html", { + "request": request, + "book": book, + "book_id": book_id, + "pdf_url": f"/books/{book_id}/original.pdf" + }) + + return await read_chapter(request, book_id=book_id, chapter_index=0) @app.get("/read/{book_id}/{chapter_index}", response_class=HTMLResponse) async def read_chapter(request: Request, book_id: str, chapter_index: int): @@ -104,6 +124,85 @@ async def serve_image(book_id: str, image_name: str): return FileResponse(img_path) +@app.post("/api/chat") +async def chat_proxy(payload: dict = Body(...)): + """ + Proxies chat requests to LLM providers to avoid CORS issues. + Payload: { + "provider": "openai" | "anthropic" | "custom", + "apiKey": "sk-...", + "baseUrl": "https://...", + "model": "gpt-4o", + "messages": [...] + } + """ + provider = payload.get("provider") + api_key = payload.get("apiKey") + base_url = payload.get("baseUrl") + model = payload.get("model") + messages = payload.get("messages") + + if not provider or not messages: + raise HTTPException(status_code=400, detail="Missing provider or messages") + + try: + async with httpx.AsyncClient(timeout=60.0) as client: + if provider == "openai": + url = "https://api.openai.com/v1/chat/completions" + headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} + data = {"model": model or "gpt-4o", "messages": messages} + resp = await client.post(url, json=data, headers=headers) + resp.raise_for_status() + return resp.json() + + elif provider == "anthropic": + url = "https://api.anthropic.com/v1/messages" + headers = { + "x-api-key": api_key, + "anthropic-version": "2023-06-01", + "Content-Type": "application/json" + } + data = {"model": model or "claude-3-5-sonnet-20240620", "messages": messages, "max_tokens": 1024} + resp = await client.post(url, json=data, headers=headers) + resp.raise_for_status() + return resp.json() + + elif provider == "custom": + # For custom, we expect a full URL in baseUrl (e.g. http://localhost:1234/v1/chat/completions) + # Or we can construct it if strictly OpenAI compatible. + # Let's assume user provides full URL for maximum flexibility + if not base_url: + raise HTTPException(status_code=400, detail="Custom provider requires baseUrl") + + heading = {} + if api_key: + heading["Authorization"] = f"Bearer {api_key}" + + # Assume OpenAI format for custom + data = {"model": model, "messages": messages} if model else {"messages": messages} + resp = await client.post(base_url, json=data, headers=heading) + resp.raise_for_status() + return resp.json() + + else: + raise HTTPException(status_code=400, detail="Unknown provider") + + except httpx.HTTPStatusError as e: + print(f"Upstream error: {e.response.text}") + raise HTTPException(status_code=e.response.status_code, detail=f"Upstream error: {e.response.text}") + except Exception as e: + print(f"Proxy error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/shutdown") +def shutdown_server(): + """Shuts down the server.""" + print("Shutting down server...") + # Schedule kill + os.kill(os.getpid(), signal.SIGTERM) + return {"message": "Server shutting down"} + if __name__ == "__main__": import uvicorn print("Starting server at http://127.0.0.1:8123") diff --git a/stop_server.py b/stop_server.py new file mode 100644 index 0000000..9ac7b13 --- /dev/null +++ b/stop_server.py @@ -0,0 +1,18 @@ +import httpx +import sys + +def stop_server(): + try: + # Try to call the shutdown endpoint + response = httpx.post("http://127.0.0.1:8123/shutdown", timeout=2.0) + if response.status_code == 200: + print("Shutdown signal sent successfully.") + else: + print(f"Server responded with status code: {response.status_code}") + except httpx.ConnectError: + print("Server is not currently running.") + except Exception as e: + print(f"An error occurred while trying to stop the server: {e}") + +if __name__ == "__main__": + stop_server() diff --git a/templates/components/chat_component.html b/templates/components/chat_component.html new file mode 100644 index 0000000..242a34a --- /dev/null +++ b/templates/components/chat_component.html @@ -0,0 +1,471 @@ + + + + + +
+
+
+ AI Assistant + +
+ +
+
+ Select text to ask about it, or just type a question. +
+
+ +
+
+
+ + +
+
+
+ + +
+ +
+ + + \ No newline at end of file diff --git a/templates/library.html b/templates/library.html index e7d094d..0def351 100644 --- a/templates/library.html +++ b/templates/library.html @@ -1,27 +1,79 @@ + My Library +

Library

{% if not books %} -

No processed books found. Run reader3.py on an epub first.

+

No processed books found. Run reader3.py on an epub first.

{% endif %}
@@ -32,10 +84,24 @@

Library

{{ book.author }}
{{ book.chapters }} sections
- Read Book + Read Book
{% endfor %} + - + + \ No newline at end of file diff --git a/templates/pdf_reader.html b/templates/pdf_reader.html new file mode 100644 index 0000000..339c626 --- /dev/null +++ b/templates/pdf_reader.html @@ -0,0 +1,446 @@ + + + + + + + {{ book.metadata.title }} + + + + + + + + +
+
+ ← Back + {{ book.metadata.title }} +
+ + +
+ + Page -- / -- + + + + + + + + +
+ +
+ +
+
+ +
+ + + + +
+
+
+ +
+
+
+
+ + + {% include "components/chat_component.html" %} +
+ + + + + + + \ No newline at end of file diff --git a/templates/reader.html b/templates/reader.html index c012edc..03e3ba1 100644 --- a/templates/reader.html +++ b/templates/reader.html @@ -1,154 +1,553 @@ + {{ book.metadata.title }} + - - @@ -274,10 +277,74 @@ } - - {% include "components/chat_component.html" %} + + {% include "components/right_sidebar.html" %} + + + + + diff --git a/templates/reader.html b/templates/reader.html index 84461f3..512cb11 100644 --- a/templates/reader.html +++ b/templates/reader.html @@ -328,7 +328,8 @@
+ style="font-size: 0.9em; border: 1px solid #555;" onclick="window.RightSidebar.toggle()">💬 + Chat
@@ -414,12 +415,54 @@ } - - {% include "components/chat_component.html" %} + + {% include "components/right_sidebar.html" %} + + + + +