Daily Content Update #105
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Daily Content Update | |
| on: | |
| schedule: | |
| # Run at 5:00 AM UTC daily | |
| - cron: '0 5 * * *' | |
| workflow_dispatch: # Allow manual trigger | |
| jobs: | |
| update-content: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| with: | |
| token: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Set up Python | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: '3.11' | |
| - name: Cache Python dependencies | |
| uses: actions/cache@v3 | |
| with: | |
| path: ~/.cache/pip | |
| key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} | |
| restore-keys: | | |
| ${{ runner.os }}-pip- | |
| - name: Install dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -r requirements.txt | |
| - name: Run incremental update | |
| run: | | |
| # Create necessary directories | |
| mkdir -p data/raw/forum data/raw/news data/raw/delta logs | |
| # Run forum update tracker | |
| python src/tools/forum_update_tracker.py update | |
| # Run incremental update for recent content | |
| python src/tools/incremental_update.py | |
| - name: Process new content with Docling | |
| run: | | |
| # Process delta content if any | |
| if [ -d "data/raw/delta" ] && [ "$(ls -A data/raw/delta)" ]; then | |
| echo "Processing delta content..." | |
| python src/rag/document_processor.py --delta | |
| else | |
| echo "No new content to process" | |
| fi | |
| - name: Update FAISS index | |
| run: | | |
| # Update vector index with new content | |
| if [ -d "data/processed" ] && [ "$(ls -A data/processed)" ]; then | |
| echo "Updating FAISS index..." | |
| python src/rag/vector_store.py --update | |
| else | |
| echo "No processed content to index" | |
| fi | |
| - name: Create update summary | |
| run: | | |
| # Generate update report | |
| python src/tools/forum_download_summary.py > update_summary.txt | |
| # Add timestamp | |
| echo "Update completed at: $(date)" >> update_summary.txt | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v3 | |
| with: | |
| name: update-artifacts | |
| path: | | |
| update_summary.txt | |
| logs/ | |
| data/update_metadata.json | |
| - name: Deploy to Railway | |
| if: success() | |
| env: | |
| RAILWAY_TOKEN: ${{ secrets.RAILWAY_TOKEN }} | |
| run: | | |
| # Install Railway CLI | |
| npm install -g @railway/cli | |
| # Deploy updated FAISS index | |
| railway up --environment production | |
| - name: Notify on failure | |
| if: failure() | |
| uses: actions/github-script@v6 | |
| with: | |
| script: | | |
| github.rest.issues.create({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| title: 'Daily Update Failed', | |
| body: `The daily content update workflow failed.\n\nRun: ${context.runId}\nDate: ${new Date().toISOString()}` | |
| }) | |
| cleanup: | |
| runs-on: ubuntu-latest | |
| needs: update-content | |
| if: always() | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Clean old logs | |
| run: | | |
| # Remove logs older than 30 days | |
| find logs/ -name "*.log" -mtime +30 -delete || true | |
| - name: Clean old delta packages | |
| run: | | |
| # Remove delta packages older than 7 days | |
| find data/raw/delta/ -name "delta_*" -mtime +7 -type d -exec rm -rf {} + || true |