Skip to content

Daily Content Update #105

Daily Content Update

Daily Content Update #105

Workflow file for this run

name: Daily Content Update
on:
schedule:
# Run at 5:00 AM UTC daily
- cron: '0 5 * * *'
workflow_dispatch: # Allow manual trigger
jobs:
update-content:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Cache Python dependencies
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Run incremental update
run: |
# Create necessary directories
mkdir -p data/raw/forum data/raw/news data/raw/delta logs
# Run forum update tracker
python src/tools/forum_update_tracker.py update
# Run incremental update for recent content
python src/tools/incremental_update.py
- name: Process new content with Docling
run: |
# Process delta content if any
if [ -d "data/raw/delta" ] && [ "$(ls -A data/raw/delta)" ]; then
echo "Processing delta content..."
python src/rag/document_processor.py --delta
else
echo "No new content to process"
fi
- name: Update FAISS index
run: |
# Update vector index with new content
if [ -d "data/processed" ] && [ "$(ls -A data/processed)" ]; then
echo "Updating FAISS index..."
python src/rag/vector_store.py --update
else
echo "No processed content to index"
fi
- name: Create update summary
run: |
# Generate update report
python src/tools/forum_download_summary.py > update_summary.txt
# Add timestamp
echo "Update completed at: $(date)" >> update_summary.txt
- name: Upload artifacts
uses: actions/upload-artifact@v3
with:
name: update-artifacts
path: |
update_summary.txt
logs/
data/update_metadata.json
- name: Deploy to Railway
if: success()
env:
RAILWAY_TOKEN: ${{ secrets.RAILWAY_TOKEN }}
run: |
# Install Railway CLI
npm install -g @railway/cli
# Deploy updated FAISS index
railway up --environment production
- name: Notify on failure
if: failure()
uses: actions/github-script@v6
with:
script: |
github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: 'Daily Update Failed',
body: `The daily content update workflow failed.\n\nRun: ${context.runId}\nDate: ${new Date().toISOString()}`
})
cleanup:
runs-on: ubuntu-latest
needs: update-content
if: always()
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Clean old logs
run: |
# Remove logs older than 30 days
find logs/ -name "*.log" -mtime +30 -delete || true
- name: Clean old delta packages
run: |
# Remove delta packages older than 7 days
find data/raw/delta/ -name "delta_*" -mtime +7 -type d -exec rm -rf {} + || true