Skip to content

Scrape Octopus Energy Free Electricity #2606

Scrape Octopus Energy Free Electricity

Scrape Octopus Energy Free Electricity #2606

Workflow file for this run

name: Scrape Octopus Energy Free Electricity
on:
schedule:
# Run every hour at minute 0
- cron: '0 * * * *'
workflow_dispatch: # Allow manual trigger
push:
branches: [ main, master ]
paths:
- '**.py'
- 'requirements.txt'
- 'config.yaml.example'
- 'gh-pages-src/**'
jobs:
scrape-and-deploy:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -r requirements.txt
- name: Prepare history data
run: |
mkdir -p output
# Priority 1: Use committed seed data if available
if [ -f gh-pages-data/history.json ]; then
cp gh-pages-data/history.json output/history.json
echo "Using seed history.json from gh-pages-data/"
# Priority 2: Download existing from gh-pages branch
elif git fetch origin gh-pages:gh-pages 2>/dev/null && git show gh-pages:history.json > output/history.json 2>/dev/null; then
echo "Downloaded existing history.json from gh-pages branch"
else
echo "No existing history.json found, will create new one"
fi
- name: Run scraper
run: |
python3 main.py --single-run
- name: Prepare GitHub Pages content
run: |
mkdir -p gh-pages-deploy
# Copy generated iCal file
cp output/octopus_free_electricity.ics gh-pages-deploy/
# Copy HTML webpages
cp gh-pages-src/index.html gh-pages-deploy/
cp gh-pages-src/history.html gh-pages-deploy/
# Copy thumbnail images if they exist
if [ -f gh-pages-src/home.thumbnail.jpeg ]; then
cp gh-pages-src/home.thumbnail.jpeg gh-pages-deploy/
fi
if [ -f gh-pages-src/history.thumbnail.jpeg ]; then
cp gh-pages-src/history.thumbnail.jpeg gh-pages-deploy/
fi
# Copy JSON data files (create empty ones if they don't exist)
if [ -f output/history.json ]; then
cp output/history.json gh-pages-deploy/
else
echo '{"sessions": [], "last_updated": null}' > gh-pages-deploy/history.json
fi
if [ -f output/upcoming_sessions.json ]; then
cp output/upcoming_sessions.json gh-pages-deploy/
else
echo '{"upcoming_sessions": [], "last_updated": null}' > gh-pages-deploy/upcoming_sessions.json
fi
# Create last-updated.txt for the "Last updated" display on the homepage
python3 -c "
import json
from pathlib import Path
out = Path('gh-pages-deploy/last-updated.txt')
for f in ['gh-pages-deploy/upcoming_sessions.json', 'gh-pages-deploy/history.json']:
if Path(f).exists():
try:
data = json.load(open(f))
ts = data.get('last_updated')
if ts:
from datetime import datetime
dt = datetime.fromisoformat(ts.replace('Z', '+00:00'))
out.write_text(f\"Last updated: {dt.strftime('%d %b %Y at %H:%M')} GMT\")
break
except: pass
if not out.exists():
out.write_text('Updates automatically every hour')
"
- name: Preserve timestamps when nothing changed
run: |
# Fetch gh-pages for comparison
if ! git fetch origin gh-pages:gh-pages 2>/dev/null || ! git rev-parse --verify gh-pages >/dev/null 2>&1; then
echo "gh-pages branch not available, skipping timestamp preservation"
exit 0
fi
# Preserve last_updated in JSON files when content unchanged (avoids unnecessary deployments)
python3 << 'PY'
import json
import subprocess
from pathlib import Path
def get_gh_pages_file(name):
try:
out = subprocess.run(['git', 'show', f'gh-pages:{name}'], capture_output=True, text=True, cwd='.')
return json.loads(out.stdout) if out.returncode == 0 else None
except:
return None
for fname, key in [('upcoming_sessions.json', 'upcoming_sessions'), ('history.json', 'sessions')]:
path = Path(f'gh-pages-deploy/{fname}')
if not path.exists():
continue
old = get_gh_pages_file(fname)
if not old:
continue
new = json.load(open(path))
old_data = json.dumps(old.get(key, []), sort_keys=True)
new_data = json.dumps(new.get(key, []), sort_keys=True)
if old_data == new_data:
new['last_updated'] = old.get('last_updated') or new.get('last_updated')
path.write_text(json.dumps(new, indent=2))
print(f"Preserved last_updated in {fname} (content unchanged)")
# Regenerate last-updated.txt from possibly-preserved JSON
out = Path('gh-pages-deploy/last-updated.txt')
for f in ['gh-pages-deploy/upcoming_sessions.json', 'gh-pages-deploy/history.json']:
if Path(f).exists():
try:
data = json.load(open(f))
ts = data.get('last_updated')
if ts:
from datetime import datetime
dt = datetime.fromisoformat(ts.replace('Z', '+00:00'))
out.write_text(f"Last updated: {dt.strftime('%d %b %Y at %H:%M')} GMT")
break
except: pass
PY
- name: Check for changes
id: check-changes
run: |
# Fetch gh-pages branch to compare (if it exists)
if git fetch origin gh-pages:gh-pages 2>/dev/null && git rev-parse --verify gh-pages >/dev/null 2>&1; then
echo "gh-pages branch exists, comparing files..."
CHANGED=false
for file in gh-pages-deploy/*; do
[ -f "$file" ] || continue
filename=$(basename "$file")
if git show gh-pages:"$filename" > /tmp/old_"$filename" 2>/dev/null; then
if ! diff -q "$file" /tmp/old_"$filename" > /dev/null 2>&1; then
CHANGED=true
echo "File changed: $filename"
break
fi
else
# File doesn't exist in gh-pages, so it's new
CHANGED=true
echo "New file: $filename"
break
fi
done
else
echo "gh-pages branch doesn't exist yet, will deploy"
CHANGED=true
fi
if [ "$CHANGED" = "false" ]; then
echo "✅ No changes detected, skipping deployment to avoid unnecessary Pages build"
else
echo "📝 Changes detected, will deploy"
fi
echo "changed=$CHANGED" >> $GITHUB_OUTPUT
- name: Deploy to GitHub Pages
if: steps.check-changes.outputs.changed == 'true'
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./gh-pages-deploy
publish_branch: gh-pages
force_orphan: false
user_name: 'github-actions[bot]'
user_email: 'github-actions[bot]@users.noreply.github.com'
commit_message: 'Update calendar and data'