Scrape Octopus Energy Free Electricity #2606
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Scrape Octopus Energy Free Electricity | |
| on: | |
| schedule: | |
| # Run every hour at minute 0 | |
| - cron: '0 * * * *' | |
| workflow_dispatch: # Allow manual trigger | |
| push: | |
| branches: [ main, master ] | |
| paths: | |
| - '**.py' | |
| - 'requirements.txt' | |
| - 'config.yaml.example' | |
| - 'gh-pages-src/**' | |
| jobs: | |
| scrape-and-deploy: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.11' | |
| cache: 'pip' | |
| - name: Install dependencies | |
| run: | | |
| pip install --upgrade pip | |
| pip install -r requirements.txt | |
| - name: Prepare history data | |
| run: | | |
| mkdir -p output | |
| # Priority 1: Use committed seed data if available | |
| if [ -f gh-pages-data/history.json ]; then | |
| cp gh-pages-data/history.json output/history.json | |
| echo "Using seed history.json from gh-pages-data/" | |
| # Priority 2: Download existing from gh-pages branch | |
| elif git fetch origin gh-pages:gh-pages 2>/dev/null && git show gh-pages:history.json > output/history.json 2>/dev/null; then | |
| echo "Downloaded existing history.json from gh-pages branch" | |
| else | |
| echo "No existing history.json found, will create new one" | |
| fi | |
| - name: Run scraper | |
| run: | | |
| python3 main.py --single-run | |
| - name: Prepare GitHub Pages content | |
| run: | | |
| mkdir -p gh-pages-deploy | |
| # Copy generated iCal file | |
| cp output/octopus_free_electricity.ics gh-pages-deploy/ | |
| # Copy HTML webpages | |
| cp gh-pages-src/index.html gh-pages-deploy/ | |
| cp gh-pages-src/history.html gh-pages-deploy/ | |
| # Copy thumbnail images if they exist | |
| if [ -f gh-pages-src/home.thumbnail.jpeg ]; then | |
| cp gh-pages-src/home.thumbnail.jpeg gh-pages-deploy/ | |
| fi | |
| if [ -f gh-pages-src/history.thumbnail.jpeg ]; then | |
| cp gh-pages-src/history.thumbnail.jpeg gh-pages-deploy/ | |
| fi | |
| # Copy JSON data files (create empty ones if they don't exist) | |
| if [ -f output/history.json ]; then | |
| cp output/history.json gh-pages-deploy/ | |
| else | |
| echo '{"sessions": [], "last_updated": null}' > gh-pages-deploy/history.json | |
| fi | |
| if [ -f output/upcoming_sessions.json ]; then | |
| cp output/upcoming_sessions.json gh-pages-deploy/ | |
| else | |
| echo '{"upcoming_sessions": [], "last_updated": null}' > gh-pages-deploy/upcoming_sessions.json | |
| fi | |
| # Create last-updated.txt for the "Last updated" display on the homepage | |
| python3 -c " | |
| import json | |
| from pathlib import Path | |
| out = Path('gh-pages-deploy/last-updated.txt') | |
| for f in ['gh-pages-deploy/upcoming_sessions.json', 'gh-pages-deploy/history.json']: | |
| if Path(f).exists(): | |
| try: | |
| data = json.load(open(f)) | |
| ts = data.get('last_updated') | |
| if ts: | |
| from datetime import datetime | |
| dt = datetime.fromisoformat(ts.replace('Z', '+00:00')) | |
| out.write_text(f\"Last updated: {dt.strftime('%d %b %Y at %H:%M')} GMT\") | |
| break | |
| except: pass | |
| if not out.exists(): | |
| out.write_text('Updates automatically every hour') | |
| " | |
| - name: Preserve timestamps when nothing changed | |
| run: | | |
| # Fetch gh-pages for comparison | |
| if ! git fetch origin gh-pages:gh-pages 2>/dev/null || ! git rev-parse --verify gh-pages >/dev/null 2>&1; then | |
| echo "gh-pages branch not available, skipping timestamp preservation" | |
| exit 0 | |
| fi | |
| # Preserve last_updated in JSON files when content unchanged (avoids unnecessary deployments) | |
| python3 << 'PY' | |
| import json | |
| import subprocess | |
| from pathlib import Path | |
| def get_gh_pages_file(name): | |
| try: | |
| out = subprocess.run(['git', 'show', f'gh-pages:{name}'], capture_output=True, text=True, cwd='.') | |
| return json.loads(out.stdout) if out.returncode == 0 else None | |
| except: | |
| return None | |
| for fname, key in [('upcoming_sessions.json', 'upcoming_sessions'), ('history.json', 'sessions')]: | |
| path = Path(f'gh-pages-deploy/{fname}') | |
| if not path.exists(): | |
| continue | |
| old = get_gh_pages_file(fname) | |
| if not old: | |
| continue | |
| new = json.load(open(path)) | |
| old_data = json.dumps(old.get(key, []), sort_keys=True) | |
| new_data = json.dumps(new.get(key, []), sort_keys=True) | |
| if old_data == new_data: | |
| new['last_updated'] = old.get('last_updated') or new.get('last_updated') | |
| path.write_text(json.dumps(new, indent=2)) | |
| print(f"Preserved last_updated in {fname} (content unchanged)") | |
| # Regenerate last-updated.txt from possibly-preserved JSON | |
| out = Path('gh-pages-deploy/last-updated.txt') | |
| for f in ['gh-pages-deploy/upcoming_sessions.json', 'gh-pages-deploy/history.json']: | |
| if Path(f).exists(): | |
| try: | |
| data = json.load(open(f)) | |
| ts = data.get('last_updated') | |
| if ts: | |
| from datetime import datetime | |
| dt = datetime.fromisoformat(ts.replace('Z', '+00:00')) | |
| out.write_text(f"Last updated: {dt.strftime('%d %b %Y at %H:%M')} GMT") | |
| break | |
| except: pass | |
| PY | |
| - name: Check for changes | |
| id: check-changes | |
| run: | | |
| # Fetch gh-pages branch to compare (if it exists) | |
| if git fetch origin gh-pages:gh-pages 2>/dev/null && git rev-parse --verify gh-pages >/dev/null 2>&1; then | |
| echo "gh-pages branch exists, comparing files..." | |
| CHANGED=false | |
| for file in gh-pages-deploy/*; do | |
| [ -f "$file" ] || continue | |
| filename=$(basename "$file") | |
| if git show gh-pages:"$filename" > /tmp/old_"$filename" 2>/dev/null; then | |
| if ! diff -q "$file" /tmp/old_"$filename" > /dev/null 2>&1; then | |
| CHANGED=true | |
| echo "File changed: $filename" | |
| break | |
| fi | |
| else | |
| # File doesn't exist in gh-pages, so it's new | |
| CHANGED=true | |
| echo "New file: $filename" | |
| break | |
| fi | |
| done | |
| else | |
| echo "gh-pages branch doesn't exist yet, will deploy" | |
| CHANGED=true | |
| fi | |
| if [ "$CHANGED" = "false" ]; then | |
| echo "✅ No changes detected, skipping deployment to avoid unnecessary Pages build" | |
| else | |
| echo "📝 Changes detected, will deploy" | |
| fi | |
| echo "changed=$CHANGED" >> $GITHUB_OUTPUT | |
| - name: Deploy to GitHub Pages | |
| if: steps.check-changes.outputs.changed == 'true' | |
| uses: peaceiris/actions-gh-pages@v3 | |
| with: | |
| github_token: ${{ secrets.GITHUB_TOKEN }} | |
| publish_dir: ./gh-pages-deploy | |
| publish_branch: gh-pages | |
| force_orphan: false | |
| user_name: 'github-actions[bot]' | |
| user_email: 'github-actions[bot]@users.noreply.github.com' | |
| commit_message: 'Update calendar and data' |