From a78ac4d3270fb079879dbdbf66b2fd2633c9485f Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Tue, 16 Dec 2025 18:25:19 +0100 Subject: [PATCH 1/2] Add development submodules and override Docker Compose configuration - Introduced `.gitmodules` to manage submodules for `rag_api`, `librechat`, and `agents`. - Added `docker-compose.librechat.override.yml` for local builds using submodules. - Updated `docker-compose.librechat.yml` to expose the RAG API port and include new environment variables for the simple reranker. - Enhanced `env.example` with configurations for the simple reranker model. - Updated `librechat.yaml` to switch reranker type to "simple". - Added README.md in the `dev` directory for setup instructions and usage of submodules. - Initialized submodules for local development. --- .gitmodules | 12 +++++ dev/README.md | 66 +++++++++++++++++++++++++++ dev/agents | 1 + dev/librechat | 1 + dev/rag_api | 1 + docker-compose.librechat.override.yml | 20 ++++++++ docker-compose.librechat.yml | 4 ++ env.example | 15 ++++-- librechat.yaml | 4 +- 9 files changed, 118 insertions(+), 6 deletions(-) create mode 100644 .gitmodules create mode 100644 dev/README.md create mode 160000 dev/agents create mode 160000 dev/librechat create mode 160000 dev/rag_api create mode 100644 docker-compose.librechat.override.yml diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..fb64978c --- /dev/null +++ b/.gitmodules @@ -0,0 +1,12 @@ +[submodule "dev/rag_api"] + path = dev/rag_api + url = https://github.com/kalaspuffar/rag_api.git + branch = reranker +[submodule "dev/librechat"] + path = dev/librechat + url = https://github.com/kalaspuffar/LibreChat.git + branch = new/feature/simple_reranker +[submodule "dev/agents"] + path = dev/agents + url = https://github.com/kalaspuffar/agents.git + branch = simple_reranker \ No newline at end of file diff --git a/dev/README.md b/dev/README.md new file mode 100644 index 00000000..d681c2ba --- /dev/null +++ b/dev/README.md @@ -0,0 +1,66 @@ +# Development Submodules + +Git submodules for local development and testing PRs. + +## Setup + +### 1. Initialize Submodules + +```bash +git submodule update --init --remote +``` + +This checks out the branches specified in `.gitmodules`. + +### 1.1. Build Agents Package + +Since `agents` is an npm package used by LibreChat, build it before starting: + +```bash +cd dev/agents +npm install +npm run build +cd ../.. +``` + +### 2. Build and Start + +To use local builds from submodules, include the override file: + +```bash +docker compose -f docker-compose.librechat.yml -f docker-compose.librechat.override.yml build +docker compose -f docker-compose.librechat.yml -f docker-compose.librechat.override.yml up -d +``` + +To use published images, omit the override file: + +```bash +docker compose -f docker-compose.librechat.yml build +docker compose -f docker-compose.librechat.yml up -d +``` + +## Testing the Reranker + +```bash +curl -s http://localhost:8000/rerank \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer $JWT_TOKEN" \ + -d '{ + "query": "I love you", + "docs": ["I hate you", "I really like you"], + "k": 5 + }' +``` + +## Update Submodules + +```bash +git submodule update --remote +``` + +## Switch Between Local and Published Images + +Since the override file is not automatically loaded, simply include or omit it in your commands: + +- **Local builds**: Include `-f docker-compose.librechat.override.yml` +- **Published images**: Omit the override file flag diff --git a/dev/agents b/dev/agents new file mode 160000 index 00000000..42d90e18 --- /dev/null +++ b/dev/agents @@ -0,0 +1 @@ +Subproject commit 42d90e185ccf58fb689409f074dc13dccb6dd352 diff --git a/dev/librechat b/dev/librechat new file mode 160000 index 00000000..2b8578c9 --- /dev/null +++ b/dev/librechat @@ -0,0 +1 @@ +Subproject commit 2b8578c94a46a43fd61fccf9f4d980becd8b009b diff --git a/dev/rag_api b/dev/rag_api new file mode 160000 index 00000000..56379030 --- /dev/null +++ b/dev/rag_api @@ -0,0 +1 @@ +Subproject commit 56379030ddd448d9a7a4ca326ff4739452e2b716 diff --git a/docker-compose.librechat.override.yml b/docker-compose.librechat.override.yml new file mode 100644 index 00000000..7859fe25 --- /dev/null +++ b/docker-compose.librechat.override.yml @@ -0,0 +1,20 @@ +# Use this file to override the docker-compose.librechat.yml file to enable local builds from dev/ submodules +# docker-compose will automatically use this file when present + +services: + api: + image: librechat:local + build: + context: ./dev/librechat + dockerfile: Dockerfile + target: node + volumes: + - ./dev/agents/dist:/app/node_modules/@librechat/agents/dist + + rag_api: + image: rag_api:local + build: + context: ./dev/rag_api + dockerfile: Dockerfile + + diff --git a/docker-compose.librechat.yml b/docker-compose.librechat.yml index 7b2238fc..072d3416 100644 --- a/docker-compose.librechat.yml +++ b/docker-compose.librechat.yml @@ -79,6 +79,8 @@ services: rag_api: container_name: rag_api image: ghcr.io/danny-avila/librechat-rag-api-dev-lite:latest + ports: + - "${LIBRECHAT_RAG_PORT:-8000}:8000" env_file: - .env environment: @@ -87,6 +89,8 @@ services: - RAG_OPENAI_API_KEY=${RAG_OPENAI_API_KEY:-${OPENROUTER_API_KEY:-}} - RAG_OPENAI_BASEURL=${RAG_OPENAI_BASEURL:-${OPENROUTER_BASE_URL:-}} - OPENAI_API_KEY=${OPENAI_API_KEY:-${OPENROUTER_API_KEY:-}} + - SIMPLE_RERANKER_MODEL_NAME=${SIMPLE_RERANKER_MODEL_NAME:-mixedbread-ai/mxbai-rerank-large-v2} + - SIMPLE_RERANKER_MODEL_TYPE=${SIMPLE_RERANKER_MODEL_TYPE:-cross-encoder} restart: always depends_on: - vectordb diff --git a/env.example b/env.example index 29cddd74..dfd7e997 100644 --- a/env.example +++ b/env.example @@ -38,13 +38,22 @@ LIBRECHAT_MEILI_MASTER_KEY=change-me-meili-master-key LIBRECHAT_RAG_PORT=8000 LIBRECHAT_RAG_API_URL=http://rag_api:8000 - # RAG API - Embeddings (uses OpenRouter by default via docker-compose) # Uncomment and set if you want to use different credentials than OpenRouter #RAG_OPENAI_API_KEY=sk-... #RAG_OPENAI_BASEURL=https://openrouter.ai/api/v1 #OPENAI_API_KEY=sk-... +# RAG API - Simple Reranker (local models, not via OpenRouter) +# Reranking models run locally, not through OpenRouter API +# See https://github.com/AnswerDotAI/rerankers for available models +SIMPLE_RERANKER_MODEL_NAME=mixedbread-ai/mxbai-rerank-large-v2 +SIMPLE_RERANKER_MODEL_TYPE=cross-encoder +# Alternative options: +#SIMPLE_RERANKER_MODEL_NAME=ms-marco-MiniLM-L-12-v2 +#SIMPLE_RERANKER_MODEL_NAME=flashrank +#SIMPLE_RERANKER_MODEL_TYPE=colbert + # LibreChat - Vector Database (PostgreSQL) LIBRECHAT_VECTORDB_DB=mydatabase LIBRECHAT_VECTORDB_USER=myuser @@ -58,7 +67,7 @@ GID=1000 SEARXNG_SECRET_KEY=change-me-searxng-secret SEARXNG_BASE_URL=http://localhost:8080/ SEARXNG_INSTANCE_URL=http://searxng:8080 -SEARXNG_API_KEY= +SEARXNG_API_KEY=local-selfhost # Firecrawl - Self-hosted stack FIRECRAWL_PORT=3003 @@ -77,7 +86,7 @@ FIRECRAWL_API_KEY=local-selfhost FIRECRAWL_VERSION=v2 FIRECRAWL_NUM_WORKERS=8 FIRECRAWL_BULL_AUTH_KEY=my-secret-key -FIRECRAWL_LLM_MODEL=gpt-4o +FIRECRAWL_LLM_MODEL=mistralai/ministral-8b-2512 # Jina Reranker (temporary - will be replaced with RAG API reranker) JINA_API_KEY= diff --git a/librechat.yaml b/librechat.yaml index f61c4e7f..bcbef299 100644 --- a/librechat.yaml +++ b/librechat.yaml @@ -85,8 +85,6 @@ webSearch: firecrawlApiKey: "${FIRECRAWL_API_KEY}" firecrawlApiUrl: "${FIRECRAWL_API_URL}" firecrawlVersion: "${FIRECRAWL_VERSION}" - jinaApiKey: "${JINA_API_KEY}" - jinaApiUrl: "${JINA_API_URL}" - rerankerType: "jina" + rerankerType: "simple" scraperTimeout: 7500 safeSearch: 1 From f58e30918d96111a497a42362f1e0e2124eb616a Mon Sep 17 00:00:00 2001 From: Pascal Garber Date: Tue, 16 Dec 2025 22:33:00 +0100 Subject: [PATCH 2/2] Enhance env.example with detailed configurations for simple reranker models - Added multiple options for reranker models, including performance notes for CPU and GPU usage. - Clarified the reranker invocation process during web searches. - Provided recommendations for model selection based on resource availability. --- env.example | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/env.example b/env.example index dfd7e997..a0067c5d 100644 --- a/env.example +++ b/env.example @@ -47,12 +47,39 @@ LIBRECHAT_RAG_API_URL=http://rag_api:8000 # RAG API - Simple Reranker (local models, not via OpenRouter) # Reranking models run locally, not through OpenRouter API # See https://github.com/AnswerDotAI/rerankers for available models +# +# IMPORTANT: The reranker is called ONCE PER SCRAPED URL during web search. +# If 5 URLs are scraped, the reranker runs 5 times. Choose models accordingly. +# +# Option 1 (high quality but CPU-intensive without GPU): SIMPLE_RERANKER_MODEL_NAME=mixedbread-ai/mxbai-rerank-large-v2 SIMPLE_RERANKER_MODEL_TYPE=cross-encoder -# Alternative options: +# +# RECOMMENDED FOR CPU-ONLY (no GPU): FlashRank models are ONNX-optimized +# and 5-10x faster on CPU with lower memory footprint. Good multilingual support. +# Option 2 - FlashRank (fastest, recommended for CPU): #SIMPLE_RERANKER_MODEL_NAME=ms-marco-MiniLM-L-12-v2 +#SIMPLE_RERANKER_MODEL_TYPE=flashrank +# +# Option 3 - FlashRank default (auto-selects best model): #SIMPLE_RERANKER_MODEL_NAME=flashrank -#SIMPLE_RERANKER_MODEL_TYPE=colbert +#SIMPLE_RERANKER_MODEL_TYPE=flashrank +# +# Option 4 - Smaller cross-encoder (if you prefer cross-encoder architecture): +#SIMPLE_RERANKER_MODEL_NAME=cross-encoder/ms-marco-MiniLM-L-6-v2 +#SIMPLE_RERANKER_MODEL_TYPE=cross-encoder +# Note: L-6-v2 has 22M parameters (very small), L-12-v2 has 33M (small) +# +# Option 5 - Multilingual cross-encoder (moderate size, good for German): +#SIMPLE_RERANKER_MODEL_NAME=BAAI/bge-reranker-base +#SIMPLE_RERANKER_MODEL_TYPE=cross-encoder +# +# PERFORMANCE NOTES: +# - FlashRank models: Best for CPU-only, ONNX optimized, fast inference +# - Cross-encoder models: Higher quality but slower on CPU, better with GPU +# - Large models (like mxbai-rerank-large-v2): Require significant CPU/GPU resources +# - For German/multilingual: All above models support multiple languages +# - Model is loaded once at startup and reused for all requests # LibreChat - Vector Database (PostgreSQL) LIBRECHAT_VECTORDB_DB=mydatabase