From 32d56175e8e3ff0822f579b6a8b44f685223ae89 Mon Sep 17 00:00:00 2001 From: qianxuege Date: Tue, 6 Jan 2026 17:09:33 -0500 Subject: [PATCH 1/7] fix: fixed soc timezone issue and added endpoint to delete events --- app/api/events.py | 62 ++++++++++++++ app/models/models.py | 8 +- scraper/README.md | 14 +++- scraper/helpers/timezone.py | 26 ++++++ scraper/transforms/soc_events.py | 4 + tests/scraper_events/test_timezone.py | 113 ++++++++++++++++++++++++++ 6 files changed, 221 insertions(+), 6 deletions(-) create mode 100644 scraper/helpers/timezone.py create mode 100644 tests/scraper_events/test_timezone.py diff --git a/app/api/events.py b/app/api/events.py index 117ba4a..c1b93e9 100644 --- a/app/api/events.py +++ b/app/api/events.py @@ -481,6 +481,68 @@ def get_all_events(): print("❌ Exception:", traceback.format_exc()) return jsonify({"error": str(e)}), 500 +@events_bp.route("/batch_delete_events_by_params", methods=["DELETE"]) +def batch_delete_events_by_params(): + """ + Deletes Events AND all dependent rows: + - EventOccurrence + - RecurrenceRule (+ overrides, rdates, exdates) + - EventTag + - Academic / Career / Club + - UserSavedEvent + + Parameters can include semester, org_id, category_id, event_type, source_url. + Note that SOC events are identified by source_url: 'https://enr-apps.as.cmu.edu/open/SOC/SOCServlet/completeSchedule' + """ + db = g.db + try: + data = request.get_json() + semester = data.get("semester", None) + org_id = data.get("org_id", None) + category_id = data.get("category_id", None) + event_type = data.get("event_type", None) + source_url = data.get("source_url", None) + print(f"called batch_delete_events_by_params:\n semester={semester}, org_id={org_id}, category_id={category_id}, event_type={event_type}, source_url={source_url}") + + if not any([org_id, category_id, semester, event_type, source_url]): + return jsonify({"error": "At least one filter must be provided"}), 400 + + query = db.query(Event) + + if org_id: + query = query.filter(Event.org_id == org_id) + if category_id: + query = query.filter(Event.category_id == category_id) + if semester: + query = query.filter(Event.semester == semester) + if event_type: + query = query.filter(Event.event_type == event_type) + if source_url: + query = query.filter(Event.source_url == source_url) + + events = query.all() + + if not events: + return jsonify({"status": "no events matched"}), 200 + + deleted_ids = [e.id for e in events] + + for event in events: + db.delete(event) + + db.commit() + + return jsonify({ + "status": "ok", + "deleted_events": len(deleted_ids), + "event_ids": deleted_ids, + }), 200 + + except Exception as e: + import traceback + print("❌ Exception:", traceback.format_exc()) + return jsonify({"error": str(e)}), 500 + @events_bp.route("/", methods=["GET"]) def get_specific_events(event_id): print("🍎🍎🍎🍎", request.url) diff --git a/app/models/models.py b/app/models/models.py index 20cb190..f514a77 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -309,10 +309,10 @@ class Event(Base): category: Mapped['Category'] = relationship('Category', back_populates='events') org: Mapped['Organization'] = relationship('Organization', back_populates='events') - event_occurrences: Mapped[List['EventOccurrence']] = relationship('EventOccurrence', back_populates='event') - event_tags: Mapped[List['EventTag']] = relationship('EventTag', back_populates='event') - recurrence_rules: Mapped[List['RecurrenceRule']] = relationship('RecurrenceRule', back_populates='event') - user_saved_events: Mapped[List['UserSavedEvent']] = relationship('UserSavedEvent', back_populates='event') + event_occurrences: Mapped[List['EventOccurrence']] = relationship('EventOccurrence', back_populates='event', passive_deletes=True) + event_tags: Mapped[List['EventTag']] = relationship('EventTag', back_populates='event', passive_deletes=True) + recurrence_rules: Mapped[List['RecurrenceRule']] = relationship('RecurrenceRule', back_populates='event', passive_deletes=True) + user_saved_events: Mapped[List['UserSavedEvent']] = relationship('UserSavedEvent', back_populates='event', passive_deletes=True) def as_dict(self): return {c.name: getattr(self, c.name) for c in self.__table__.columns} diff --git a/scraper/README.md b/scraper/README.md index 7cf6940..744ac69 100644 --- a/scraper/README.md +++ b/scraper/README.md @@ -11,8 +11,18 @@ 2. If running the schedule of classes scraper, make sure to change the semester_label in `scraper = ScheduleOfClassesScraper(db, semester_label="Spring_26")`. - Acceptable formats include `Spring_xx`, `Fall_xx`, `Summer1_xx`, `Summer2_xx`. - Feel free to change the start and end dates of each semester in `scraper/helpers/semester.py` if needed. -3. Run: -`python -m scraper.scripts.export_soc` +3. Run `python -m scraper.scripts.export_soc` to scrape data and add events to the DB. + - first creates org and category for each SOC event if those don't exist, then add events, recurrence_rules, and calls an endpoint to generate event occurrences. + +* If need to delete, run this: +``` +curl -X DELETE http://localhost:5001/api/events/batch_delete_events_by_params \ + -H "Content-Type: application/json" \ + -d '{ + "semester": "Spring_26", + "source_url": "https://enr-apps.as.cmu.edu/open/SOC/SOCServlet/completeSchedule" + }' +``` ## Production Environment - created a cron job on Railway that calls `python -m scraper.scripts.export_soc` diff --git a/scraper/helpers/timezone.py b/scraper/helpers/timezone.py new file mode 100644 index 0000000..81da273 --- /dev/null +++ b/scraper/helpers/timezone.py @@ -0,0 +1,26 @@ +# scraper/helpers/timezone.py +from zoneinfo import ZoneInfo + +DEFAULT_TZ = ZoneInfo("America/New_York") + +LOCATION_TZ_MAP = { + "doha, qatar": ZoneInfo("Asia/Qatar"), + "kigali, rwanda": ZoneInfo("Africa/Kigali"), + "lisbon, portugal": ZoneInfo("Europe/Lisbon"), + + "los angeles, california": ZoneInfo("America/Los_Angeles"), + "san jose, california": ZoneInfo("America/Los_Angeles"), + + "new york, new york": ZoneInfo("America/New_York"), + "pittsburgh, pennsylvania": ZoneInfo("America/New_York"), + "washington, district of columbia": ZoneInfo("America/New_York"), + "washington, district of columbia:dulles": ZoneInfo("America/New_York"), +} + +def timezone_from_location(location: str | None): + if not location: + return DEFAULT_TZ + + key = location.strip().lower() + + return LOCATION_TZ_MAP.get(key, DEFAULT_TZ) diff --git a/scraper/transforms/soc_events.py b/scraper/transforms/soc_events.py index 1e3a3d5..f5abd77 100644 --- a/scraper/transforms/soc_events.py +++ b/scraper/transforms/soc_events.py @@ -1,5 +1,6 @@ from scraper.helpers.recurrence import parse_soc_time, build_rrule_from_parts from scraper.helpers.event import event_identity, group_soc_rows +from scraper.helpers.timezone import timezone_from_location def build_events_and_rrules(soc_rows, org_id_by_key, category_id_by_org): from collections import defaultdict @@ -24,14 +25,17 @@ def build_events_and_rrules(soc_rows, org_id_by_key, category_id_by_org): org_id = org_id_by_key[(course_num, semester)] category_id = category_id_by_org[org_id] + tz = timezone_from_location(location) start_dt = datetime.datetime.combine( soc0.sem_start, parse_soc_time(time_start), + tzinfo=tz, ) end_dt = datetime.datetime.combine( soc0.sem_start, parse_soc_time(time_end), + tzinfo=tz, ) title = f"{course_num} {lecture_section}" diff --git a/tests/scraper_events/test_timezone.py b/tests/scraper_events/test_timezone.py new file mode 100644 index 0000000..827b408 --- /dev/null +++ b/tests/scraper_events/test_timezone.py @@ -0,0 +1,113 @@ +import pytest +from zoneinfo import ZoneInfo +from datetime import datetime + +from scraper.helpers.timezone import timezone_from_location, DEFAULT_TZ +from scraper.transforms.soc_events import build_events_and_rrules +from scraper.models import ScheduleOfClasses + + +@pytest.mark.parametrize( + "location,expected", + [ + ("Pittsburgh, Pennsylvania", "America/New_York"), + ("New York, New York", "America/New_York"), + ("Washington, District of Columbia", "America/New_York"), + ("Washington, District of Columbia:Dulles", "America/New_York"), + ("Los Angeles, California", "America/Los_Angeles"), + ("San Jose, California", "America/Los_Angeles"), + ("Doha, Qatar", "Asia/Qatar"), + ("Kigali, Rwanda", "Africa/Kigali"), + ("Lisbon, Portugal", "Europe/Lisbon"), + ], +) +def test_timezone_from_location_known(location, expected): + tz = timezone_from_location(location) + assert isinstance(tz, ZoneInfo) + assert tz.key == expected + + +def test_timezone_from_location_unknown_fallback(): + tz = timezone_from_location("Some Random Place") + assert tz == DEFAULT_TZ + + +def test_timezone_from_location_none_fallback(): + tz = timezone_from_location(None) + assert tz == DEFAULT_TZ + +# Integration test to ensure build_events_and_rrules creates timezone-aware datetimes +SEM_START = datetime(2026, 1, 12) +SEM_END = datetime(2026, 5, 5) +def make_soc(location="Pittsburgh, Pennsylvania"): + return ScheduleOfClasses( + id=1, + course_num="15112", + course_name="Foundations of Programming", + lecture_section="A", + lecture_days="MWF", + lecture_time_start="09:00AM", + lecture_time_end="09:50AM", + location=location, + semester="Spring_26", + sem_start=SEM_START, + sem_end=SEM_END, + ) + + +def test_build_events_timezone_aware(): + soc = make_soc() + org_id_by_key = {("15112", "Spring_26"): 1} + category_id_by_org = {1: 10} + + events, rrules = build_events_and_rrules( + [soc], + org_id_by_key, + category_id_by_org, + ) + + event = events[0] + + assert event["start_datetime"].tzinfo is not None + assert event["end_datetime"].tzinfo is not None + assert event["start_datetime"].tzinfo.key == "America/New_York" + +def test_dst_transition_new_york(): + soc = make_soc() + org_id_by_key = {("15112", "Spring_26"): 1} + category_id_by_org = {1: 10} + + events, _ = build_events_and_rrules( + [soc], + org_id_by_key, + category_id_by_org, + ) + + start = events[0]["start_datetime"] + + # January 12, 2026 should be EST (UTC-5) + assert start.utcoffset().total_seconds() == -5 * 3600 + assert start.isoformat().endswith("-05:00") + expected = datetime( + 2026, 1, 12, 9, 0, + tzinfo=ZoneInfo("America/New_York") + ) + assert start == expected + + + # Now test a date in daylight saving time + soc.sem_start = datetime(2026, 3, 30) # After + events, _ = build_events_and_rrules( + [soc], + org_id_by_key, + category_id_by_org, + ) + start = events[0]["start_datetime"] + # March 30, 2026 should be EDT (UTC-4) + assert start.utcoffset().total_seconds() == -4 * 3600 + assert start.isoformat().endswith("-04:00") + expected = datetime( + 2026, 3, 30, 9, 0, + tzinfo=ZoneInfo("America/New_York") + ) + assert start == expected \ No newline at end of file From c0d76cc2d312f04a88b757391dd2b328075552a0 Mon Sep 17 00:00:00 2001 From: qianxuege Date: Thu, 8 Jan 2026 22:19:30 -0500 Subject: [PATCH 2/7] fix: fixed timezone issue in soc scscraper --- app/api/events.py | 115 ++++++++++++++++----- app/models/event.py | 1 + app/models/event_occurrence.py | 96 ++++++++++++++--- app/models/recurrence_override.py | 1 - app/models/recurrence_rule.py | 4 +- app/services/ical.py | 6 +- app/utils/date.py | 13 +++ scraper/README.md | 7 +- scraper/helpers/recurrence.py | 17 ++- scraper/persistence/supabase_org_course.py | 6 +- scraper/persistence/supabase_recurrence.py | 1 - scraper/transforms/soc_events.py | 2 + 12 files changed, 217 insertions(+), 52 deletions(-) diff --git a/app/api/events.py b/app/api/events.py index 72f12d6..f7a116b 100644 --- a/app/api/events.py +++ b/app/api/events.py @@ -10,10 +10,10 @@ from app.models.recurrence_rule import add_recurrence_rule from app.models.event_occurrence import populate_event_occurrences, regenerate_event_occurrences_by_event_ids, save_event_occurrence from app.models.category import category_to_dict, get_category_by_id -from app.models.models import CalendarSource, Event, RecurrenceRule, UserSavedEvent, Organization, EventOccurrence, EventTag, Category, Tag +from app.models.models import Academic, CalendarSource, Career, Club, Event, RecurrenceRule, UserSavedEvent, Organization, EventOccurrence, EventTag, Category, Tag, RecurrenceExdate, RecurrenceRdate, EventOverride, RecurrenceOverride import pprint from datetime import datetime, timezone -from sqlalchemy import cast, Date, or_ +from sqlalchemy import cast, Date, or_, delete, select from app.services.ical import import_ical_feed_using_helpers from app.errors.ical import ICalFetchError @@ -353,6 +353,8 @@ def regenerate_occurrences_by_events(): regenerated, skipped = regenerate_event_occurrences_by_event_ids(db, event_ids) + print("Before commit, occurrences count:", db.query(EventOccurrence).count()) + db.commit() return jsonify({ @@ -505,45 +507,110 @@ def batch_delete_events_by_params(): db = g.db try: data = request.get_json() - semester = data.get("semester", None) - org_id = data.get("org_id", None) - category_id = data.get("category_id", None) - event_type = data.get("event_type", None) - source_url = data.get("source_url", None) - print(f"called batch_delete_events_by_params:\n semester={semester}, org_id={org_id}, category_id={category_id}, event_type={event_type}, source_url={source_url}") - if not any([org_id, category_id, semester, event_type, source_url]): + semester = data.get("semester") + org_id = data.get("org_id") + category_id = data.get("category_id") + event_type = data.get("event_type") + source_url = data.get("source_url") + + if not any([semester, org_id, category_id, event_type, source_url]): return jsonify({"error": "At least one filter must be provided"}), 400 - query = db.query(Event) + # -------------------------------------------------- + # 1. Build event ID subquery + # -------------------------------------------------- + event_ids = select(Event.id) + if semester: + event_ids = event_ids.where(Event.semester == semester) if org_id: - query = query.filter(Event.org_id == org_id) + event_ids = event_ids.where(Event.org_id == org_id) if category_id: - query = query.filter(Event.category_id == category_id) - if semester: - query = query.filter(Event.semester == semester) + event_ids = event_ids.where(Event.category_id == category_id) if event_type: - query = query.filter(Event.event_type == event_type) + event_ids = event_ids.where(Event.event_type == event_type) if source_url: - query = query.filter(Event.source_url == source_url) + event_ids = event_ids.where(Event.source_url == source_url) - events = query.all() - - if not events: + # Materialize once (for counts + reuse) + event_id_list = db.execute(event_ids).scalars().all() + if not event_id_list: return jsonify({"status": "no events matched"}), 200 - deleted_ids = [e.id for e in events] + event_id_subq = select(Event.id).where(Event.id.in_(event_id_list)) - for event in events: - db.delete(event) + # -------------------------------------------------- + # 2. Delete Event-level children + # -------------------------------------------------- + print(f"Deleting dependent rows for {len(event_id_list)} events") + db.execute( + delete(EventOccurrence).where(EventOccurrence.event_id.in_(event_id_subq)) + ) + db.execute( + delete(EventTag).where(EventTag.event_id.in_(event_id_subq)) + ) + db.execute( + delete(UserSavedEvent).where(UserSavedEvent.event_id.in_(event_id_subq)) + ) + db.execute( + delete(Academic).where(Academic.event_id.in_(event_id_subq)) + ) + db.execute( + delete(Career).where(Career.event_id.in_(event_id_subq)) + ) + db.execute( + delete(Club).where(Club.event_id.in_(event_id_subq)) + ) + # -------------------------------------------------- + # 3. Handle recurrence hierarchy + # -------------------------------------------------- + rrule_ids = select(RecurrenceRule.id).where( + RecurrenceRule.event_id.in_(event_id_subq) + ) + db.execute( + delete(RecurrenceExdate).where( + RecurrenceExdate.rrule_id.in_(rrule_ids) + ) + ) + + db.execute( + delete(RecurrenceRdate).where( + RecurrenceRdate.rrule_id.in_(rrule_ids) + ) + ) + + db.execute( + delete(EventOverride).where( + EventOverride.rrule_id.in_(rrule_ids) + ) + ) + + db.execute( + delete(RecurrenceOverride).where( + RecurrenceOverride.rrule_id.in_(rrule_ids) + ) + ) + + db.execute( + delete(RecurrenceRule).where( + RecurrenceRule.id.in_(rrule_ids) + ) + ) + + # -------------------------------------------------- + # 4. Delete events + # -------------------------------------------------- + db.execute( + delete(Event).where(Event.id.in_(event_id_subq)) + ) db.commit() return jsonify({ "status": "ok", - "deleted_events": len(deleted_ids), - "event_ids": deleted_ids, + "deleted_events": len(event_id_list), + "event_ids": event_id_list, }), 200 except Exception as e: diff --git a/app/models/event.py b/app/models/event.py index 7d8e963..d2c4d9c 100644 --- a/app/models/event.py +++ b/app/models/event.py @@ -50,6 +50,7 @@ def save_event(db, org_id: int, category_id: int, title: str, start_datetime: st db.flush() # Allocate event.id without committing return event + def get_event_by_id(db, event_id: int): """ Retrieve an event by its ID. diff --git a/app/models/event_occurrence.py b/app/models/event_occurrence.py index a2bd9d4..c14cf26 100644 --- a/app/models/event_occurrence.py +++ b/app/models/event_occurrence.py @@ -8,8 +8,13 @@ from datetime import datetime, timedelta, timezone from typing import List, Dict, Tuple, Optional from copy import deepcopy -from app.utils.date import _ensure_aware, _parse_iso_aware +from app.utils.date import _ensure_aware, _parse_iso_aware, normalize_occurrence, normalize_set_to_tz +TRACE_EVENT_ID = 117815 + +def trace(event, *msg): + if event and event.id == TRACE_EVENT_ID: + print("🧭 TRACE:", *msg) def apply_overrides( occ_start: datetime, @@ -83,6 +88,16 @@ def apply_overrides( return start_dt, end_dt, title, desc, loc +def delete_event_occurrences_by_event_id(db, event_id: int): + """ + Delete all event occurrences for a given event ID. + + Args: + db: Database session. + event_id: ID of the event whose occurrences are to be deleted. + """ + db.query(EventOccurrence).filter_by(event_id=event_id).delete(synchronize_session=False) + db.flush() ### need to check type of event_saved_at, start_datetime, end_datetime before using them def save_event_occurrence(db, event_id: int, org_id: int, category_id: int, title: str, @@ -106,6 +121,10 @@ def save_event_occurrence(db, event_id: int, org_id: int, category_id: int, titl end_dt = _parse_iso_aware(end_datetime) if isinstance(end_datetime, str) else end_datetime saved_at = _parse_iso_aware(event_saved_at) if isinstance(event_saved_at, str) else event_saved_at + start_dt = start_dt.astimezone(timezone.utc) + end_dt = end_dt.astimezone(timezone.utc) + saved_at = saved_at.astimezone(timezone.utc) + event_occurrence = EventOccurrence( event_id=event_id, org_id=org_id, @@ -139,9 +158,21 @@ def populate_event_occurrences(db, event: Event, rule: RecurrenceRule): Returns: A message indicating the number of occurrences populated. """ + if event.id == TRACE_EVENT_ID: + print("🧭 TRACE: populate_event_occurrences()") + # Defensive duration (end could be equal to start in some feeds) end_datetime = _parse_iso_aware(event.end_datetime) if event.end_datetime else None start_datetime = _parse_iso_aware(event.start_datetime) if event.start_datetime else None + event_tz = event.start_datetime.tzinfo + trace(event, "event_tz =", event_tz) + + trace(event, + "start_datetime =", event.start_datetime, + "end_datetime =", event.end_datetime, + "tz =", event.start_datetime.tzinfo + ) + duration = (end_datetime or start_datetime) - start_datetime if duration.total_seconds() < 0: duration = timedelta(0) @@ -159,11 +190,18 @@ def populate_event_occurrences(db, event: Event, rule: RecurrenceRule): temp_rule.until = min(_ensure_aware(temp_rule.until), six_months_later) print("➑️ rule.start_datetime =", rule.start_datetime) - print("➑️ rule.until =", rule.until) - print("➑️ temp_rule.until =", temp_rule.until) + # print("➑️ rule.until =", rule.until) + # print("➑️ temp_rule.until =", temp_rule.until) + + trace(event, + "rule.start =", rule.start_datetime, + "rule.until =", rule.until, + "temp.until =", temp_rule.until + ) # Build an rrule iterator from temp_rule - rrule_iter = get_rrule_from_db_rule(temp_rule) + rrule_iter = list(get_rrule_from_db_rule(temp_rule)) + trace(event, "RRULE count =", len(rrule_iter)) # Pull EXDATE/RDATE/Overrides/RecurrenceOverrides from DB exdates = { @@ -177,7 +215,7 @@ def populate_event_occurrences(db, event: Event, rule: RecurrenceRule): } overrides = { - _ensure_aware(o.recurrence_date): o + normalize_occurrence(_ensure_aware(o.recurrence_date), event_tz): o for o in db.query(EventOverride).filter_by(rrule_id=rule.id).all() } @@ -189,7 +227,7 @@ def populate_event_occurrences(db, event: Event, rule: RecurrenceRule): try: ro_rrule = rrule_from_db_recurrence_override(ro) for ro_date in ro_rrule: - ro_date = _ensure_aware(ro_date) + ro_date = normalize_occurrence(_ensure_aware(ro_date), event_tz) # If multiple patterns match the same date, later ones win # (could add priority field later if needed) recurrence_override_dates[ro_date] = ro @@ -197,14 +235,21 @@ def populate_event_occurrences(db, event: Event, rule: RecurrenceRule): print(f"⚠️ Failed to expand RecurrenceOverride {ro.id}: {e}") # Start fresh for this event's occurrences - db.query(EventOccurrence).filter_by(event_id=event.id).delete(synchronize_session=False) + deleted = db.query(EventOccurrence).filter_by(event_id=event.id).delete(synchronize_session=False) + trace(event, "Deleted existing occurrences:", deleted) count = 0 seen_starts = set() # to avoid dupes when RDATE == RRULE date # 1) Generate occurrences from RRULE, skipping EXDATE and applying overrides + exdates = normalize_set_to_tz(exdates, event_tz) + rdates = normalize_set_to_tz(rdates, event_tz) + for occ_start in rrule_iter: - occ_start = _ensure_aware(occ_start) + occ_start = normalize_occurrence(occ_start, event_tz) + + if event.id == TRACE_EVENT_ID and count < 3: + print("🧭 TRACE: occ_start =", occ_start) if occ_start in exdates: continue @@ -213,13 +258,16 @@ def populate_event_occurrences(db, event: Event, rule: RecurrenceRule): occ_start, event, duration, overrides, recurrence_override_dates ) + start_dt_utc = start_dt.astimezone(timezone.utc) + end_dt_utc = end_dt.astimezone(timezone.utc) + db.add(EventOccurrence( event_id=event.id, org_id=event.org_id, category_id=event.category_id, title=title, - start_datetime=start_dt, - end_datetime=end_dt, + start_datetime=start_dt_utc, + end_datetime=end_dt_utc, event_saved_at=event.last_updated_at, recurrence="RECURRING", is_all_day=event.is_all_day, @@ -245,13 +293,16 @@ def populate_event_occurrences(db, event: Event, rule: RecurrenceRule): if not rule.count and (start_dt > six_months_later): continue + start_dt_utc = start_dt.astimezone(timezone.utc) + end_dt_utc = end_dt.astimezone(timezone.utc) + db.add(EventOccurrence( event_id=event.id, org_id=event.org_id, category_id=event.category_id, title=title, - start_datetime=start_dt, - end_datetime=end_dt, + start_datetime= start_dt_utc, + end_datetime=end_dt_utc, event_saved_at=event.last_updated_at, recurrence="RECURRING", is_all_day=event.is_all_day, @@ -269,6 +320,10 @@ def populate_event_occurrences(db, event: Event, rule: RecurrenceRule): event.last_updated_at = now db.flush() + trace(event, + "Occurrences in session =", + db.query(EventOccurrence).filter_by(event_id=event.id).count() + ) return f"Populated {count} occurrences for event {event.id}" def regenerate_event_occurrences_by_event_ids(db, event_ids: List[int]) -> Dict[int, str]: @@ -286,20 +341,33 @@ def regenerate_event_occurrences_by_event_ids(db, event_ids: List[int]) -> Dict[ start = datetime.now(timezone.utc) for event_id in event_ids: event = db.query(Event).get(event_id) + + if event_id == TRACE_EVENT_ID: + print("🧭 TRACE: Found event", event_id) + if not event: skipped += 1 continue rule = db.query(RecurrenceRule).filter_by(event_id=event.id).first() + + if event_id == TRACE_EVENT_ID: + print("🧭 TRACE: Rule exists?", bool(rule)) + if not rule: skipped += 1 continue if rule.last_generated_at and rule.last_generated_at > event.last_updated_at: + if event_id == TRACE_EVENT_ID: + print("🧭 TRACE: SKIPPED due to timestamps", rule.last_generated_at, event.last_updated_at) continue - - populate_event_occurrences(db, event, rule) + try: + populate_event_occurrences(db, event, rule) + except Exception as e: + print("FAILED during populate:", e) + raise regenerated += 1 end = datetime.now(timezone.utc) # total minutes diff --git a/app/models/recurrence_override.py b/app/models/recurrence_override.py index 22e5fdd..fea4cb2 100644 --- a/app/models/recurrence_override.py +++ b/app/models/recurrence_override.py @@ -8,7 +8,6 @@ ) from typing import List, Optional, Union from dateutil.parser import parse as parse_datetime -from app.utils.date import _ensure_aware from app.models.recurrence_rule import parse_by_day_array diff --git a/app/models/recurrence_rule.py b/app/models/recurrence_rule.py index 6e4ae6c..851938f 100644 --- a/app/models/recurrence_rule.py +++ b/app/models/recurrence_rule.py @@ -9,7 +9,6 @@ ) from typing import List, Optional, Union from dateutil.parser import parse as parse_datetime -from app.utils.date import _ensure_aware def add_recurrence_rule(db, event_id: int, frequency: FrequencyType, interval: int, start_datetime: str, count: int = None, until: str = None, @@ -106,6 +105,9 @@ def get_rrule_from_db_rule(rule) -> rrule: Assumes `rule` has attributes: frequency, interval, start_datetime, count, until, by_day (List[str]), by_month (int or List[int]), by_month_day (int or List[int]). """ + assert rule.start_datetime.tzinfo is not None, \ + "RRULE start_datetime must be tz-aware" + freq_map = { 'DAILY': DAILY, 'WEEKLY': WEEKLY, diff --git a/app/services/ical.py b/app/services/ical.py index 35b0365..5d942ce 100644 --- a/app/services/ical.py +++ b/app/services/ical.py @@ -401,7 +401,7 @@ def _process_uid_group_with_helpers( for ex_date in ex.dts: db_session.add(RecurrenceExdate( rrule_id=rule.id, - exdate=_ensure_aware(ex_date.dt) + exdate=_ensure_aware(ex_date.dt).astimezone(timezone.utc) )) # ---- Safe RDATE normalization ---- @@ -424,7 +424,7 @@ def _process_uid_group_with_helpers( for rd in entry.dts: db_session.add(RecurrenceRdate( rrule_id=rule.id, - rdate=_ensure_aware(rd.dt) + rdate=_ensure_aware(rd.dt).astimezone(timezone.utc) )) db_session.flush() @@ -459,7 +459,7 @@ def _process_uid_group_with_helpers( db_session.delete(old_rule) db_session.flush() - if changed or not _has_occurrence(db_session, event.id, _ensure_aware(dtstart), _ensure_aware(dtend) if dtend else _ensure_aware(dtstart)): + if changed or not _has_occurrence(db_session, event.id, _ensure_aware(dtstart).astimezone(timezone.utc), _ensure_aware(dtend).astimezone(timezone.utc) if dtend else _ensure_aware(dtstart).astimezone(timezone.utc)): # Write a single occurrence via your helper event_saved_at = getattr(event, "last_updated_at", datetime.utcnow()) diff --git a/app/utils/date.py b/app/utils/date.py index 93223e7..41483f9 100644 --- a/app/utils/date.py +++ b/app/utils/date.py @@ -74,6 +74,19 @@ def decoded_dt_with_tz(component, key: str, default_tz: ZoneInfo = DEFAULT_TZ): return None +def normalize_occurrence(occ_start: datetime, event_tz): + if occ_start.tzinfo is None: + return occ_start.replace(tzinfo=event_tz) + return occ_start.astimezone(event_tz) + +def normalize_set_to_tz(dts, tz): + out = set() + for dt in dts: + if dt.tzinfo is None: + out.add(dt.replace(tzinfo=tz)) + else: + out.add(dt.astimezone(tz)) + return out def _ensure_aware(dt): if dt is None: diff --git a/scraper/README.md b/scraper/README.md index 744ac69..fa93cb0 100644 --- a/scraper/README.md +++ b/scraper/README.md @@ -17,11 +17,8 @@ * If need to delete, run this: ``` curl -X DELETE http://localhost:5001/api/events/batch_delete_events_by_params \ - -H "Content-Type: application/json" \ - -d '{ - "semester": "Spring_26", - "source_url": "https://enr-apps.as.cmu.edu/open/SOC/SOCServlet/completeSchedule" - }' +-H "Content-Type: application/json" \ +--data-raw '{"semester":"Spring_26","source_url":"https://enr-apps.as.cmu.edu/open/SOC/SOCServlet/completeSchedule"}' ``` ## Production Environment diff --git a/scraper/helpers/recurrence.py b/scraper/helpers/recurrence.py index 4ba3450..921aa2c 100644 --- a/scraper/helpers/recurrence.py +++ b/scraper/helpers/recurrence.py @@ -1,5 +1,6 @@ from datetime import datetime, time from typing import Optional +from zoneinfo import ZoneInfo from app.models import FrequencyType from scraper.models import ScheduleOfClasses @@ -27,6 +28,8 @@ def build_rrule_from_parts( lecture_days: str, sem_start, sem_end, + start_time: time, + tz: ZoneInfo, ) -> Optional[dict]: if not lecture_days: @@ -34,14 +37,24 @@ def build_rrule_from_parts( by_day = [SOC_DAY_MAP[d] for d in lecture_days] - until_dt = datetime.combine(sem_end, time.max) + dtstart = datetime.combine( + sem_start, + start_time, + tzinfo=tz, + ) + + until_dt = datetime.combine( + sem_end, + time(23, 59, 59), + tzinfo=tz, + ) - # Note that start date is not included here; it is handled elsewhere return { "frequency": FrequencyType.WEEKLY, "interval": 1, "by_day": by_day, "until": until_dt, + "start_datetime": dtstart, "count": None, "by_month": None, "by_month_day": None, diff --git a/scraper/persistence/supabase_org_course.py b/scraper/persistence/supabase_org_course.py index c4c1679..6020af8 100644 --- a/scraper/persistence/supabase_org_course.py +++ b/scraper/persistence/supabase_org_course.py @@ -15,7 +15,11 @@ def upsert_orgs(db, orgs: dict) -> dict: data.append(clean) # Upsert: insert or update on conflict - db.table("organizations").upsert(data, on_conflict="name").execute() + for batch in chunked(data, 200): + db.table("organizations").upsert( + batch, + on_conflict="name" + ).execute() # Fetch IDs back name_to_id = {} diff --git a/scraper/persistence/supabase_recurrence.py b/scraper/persistence/supabase_recurrence.py index a5bd4fe..fb7fc01 100644 --- a/scraper/persistence/supabase_recurrence.py +++ b/scraper/persistence/supabase_recurrence.py @@ -26,7 +26,6 @@ def replace_recurrence_rules(db, rrules, event_id_by_identity): row = { **r, "event_id": event_id, - "start_datetime": start_dt, } row.pop("_identity") diff --git a/scraper/transforms/soc_events.py b/scraper/transforms/soc_events.py index f5abd77..9edefab 100644 --- a/scraper/transforms/soc_events.py +++ b/scraper/transforms/soc_events.py @@ -75,6 +75,8 @@ def build_events_and_rrules(soc_rows, org_id_by_key, category_id_by_org): lecture_days="".join(sorted(all_days)), sem_start=soc0.sem_start, sem_end=soc0.sem_end, + start_time=parse_soc_time(time_start), + tz=tz, ) if rrule: From 495d0a7a989d6ad927d49cfe4f14ef8ee77972f7 Mon Sep 17 00:00:00 2001 From: qianxuege Date: Fri, 9 Jan 2026 00:55:30 -0500 Subject: [PATCH 3/7] test: added tests for timezone in ical events --- app/models/event.py | 9 +- app/models/recurrence_rule.py | 7 +- app/services/ical.py | 73 +++++++++++- app/utils/date.py | 10 ++ tests/conftest.py | 1 + tests/factories/category_factory.py | 15 +++ tests/ical/test_calendar_timezone.py | 159 +++++++++++++++++++++++++++ 7 files changed, 264 insertions(+), 10 deletions(-) create mode 100644 tests/factories/category_factory.py create mode 100644 tests/ical/test_calendar_timezone.py diff --git a/app/models/event.py b/app/models/event.py index d2c4d9c..ab72feb 100644 --- a/app/models/event.py +++ b/app/models/event.py @@ -5,7 +5,7 @@ from datetime import datetime, timedelta, timezone from typing import Dict, List, Optional import requests -from app.utils.date import infer_semester_from_datetime +from app.utils.date import ensure_aware_datetime, infer_semester_from_datetime from app.models.models import Event, RecurrenceRule, EventOccurrence, RecurrenceRdate, RecurrenceExdate, EventOverride ### need to check type of start_datetime, end_datetime before using them @@ -29,6 +29,9 @@ def save_event(db, org_id: int, category_id: int, title: str, start_datetime: st Returns: The created Event object. """ + start_dt = ensure_aware_datetime(start_datetime) + end_dt = ensure_aware_datetime(end_datetime) + if semester is None: semester = infer_semester_from_datetime(start_datetime) @@ -37,8 +40,8 @@ def save_event(db, org_id: int, category_id: int, title: str, start_datetime: st category_id=category_id, title=title, description=description, - start_datetime=start_datetime, - end_datetime=end_datetime, + start_datetime=start_dt, + end_datetime=end_dt, is_all_day=is_all_day, location=location, semester=semester, diff --git a/app/models/recurrence_rule.py b/app/models/recurrence_rule.py index 851938f..c39902a 100644 --- a/app/models/recurrence_rule.py +++ b/app/models/recurrence_rule.py @@ -10,6 +10,8 @@ from typing import List, Optional, Union from dateutil.parser import parse as parse_datetime +from app.utils.date import ensure_aware_datetime + def add_recurrence_rule(db, event_id: int, frequency: FrequencyType, interval: int, start_datetime: str, count: int = None, until: str = None, by_month: int = None, by_month_day: int = None, by_day: List[str] = None): @@ -20,6 +22,9 @@ def add_recurrence_rule(db, event_id: int, frequency: FrequencyType, when rendering the rule, it will use the count or until date to determine the end of the recurrence (regenerate if count is NULL). """ + start_dt = ensure_aware_datetime(start_datetime) + until = ensure_aware_datetime(until) if until else None + six_months_later = datetime.now(timezone.utc) + timedelta(days=180) orig_until = None @@ -36,7 +41,7 @@ def add_recurrence_rule(db, event_id: int, frequency: FrequencyType, new_rule = RecurrenceRule( frequency=frequency, interval=interval, - start_datetime=start_datetime, + start_datetime=start_dt, count=count, until=until, event_id=event_id, diff --git a/app/services/ical.py b/app/services/ical.py index 5d942ce..b3a9214 100644 --- a/app/services/ical.py +++ b/app/services/ical.py @@ -1,6 +1,7 @@ from icalendar import Calendar from app.utils.date import _ensure_aware, _parse_iso, decoded_dt_with_tz, infer_semester_from_datetime, parsed_httpdate_to_dt +from zoneinfo import ZoneInfo from datetime import datetime, timedelta, timezone, date from typing import Dict, List, Optional @@ -23,6 +24,50 @@ LOOKAHEAD_DAYS = 180 # window for generating occurrences +def normalize_ics_datetime(dt, calendar_tz: ZoneInfo): + """ + Normalize datetime parsed from ICS. + + Rules: + - If dt is floating (tzinfo is None): interpret in calendar_tz + - If dt has tzinfo: trust it (do NOT re-localize) + - Convert to UTC only at persistence time, DO NOT convert to UTC here + """ + if dt.tzinfo is None: + # Floating time β†’ calendar local time + dt = dt.replace(tzinfo=calendar_tz) + + return dt + +def get_calendar_timezone(cal: Calendar) -> Optional[ZoneInfo]: + """ + Derive calendar timezone from an ICS Calendar object. + Should work for Google Calendar, Apple Calendar, Outlook, most RFC-compliant ICS feeds. + + Priority: + 1. X-WR-TIMEZONE (Google, Apple) + 2. VTIMEZONE TZID + """ + + # X-WR-TIMEZONE (most common for Google Calendar) + tzname = cal.get("X-WR-TIMEZONE") + if tzname: + try: + return ZoneInfo(str(tzname)) + except Exception: + pass + + # VTIMEZONE component + for component in cal.walk("VTIMEZONE"): + tzid = component.get("TZID") + if tzid: + try: + return ZoneInfo(str(tzid)) + except Exception: + pass + + return None + def import_ical_feed_using_helpers( db_session, ical_text_or_url: str, @@ -51,6 +96,11 @@ def import_ical_feed_using_helpers( "error": "ICAL_PARSE_ERROR", "message": "The calendar data could not be parsed as a valid iCal file.", } + + calendar_tz = ( + get_calendar_timezone(cal) + or ZoneInfo("UTC") # last-resort fallback + ) event_ids = [] @@ -74,6 +124,7 @@ def import_ical_feed_using_helpers( event_id = _process_uid_group_with_helpers( db_session=db_session, uid=uid, + calendar_tz=calendar_tz, components=components, now=now, horizon=horizon, @@ -205,6 +256,7 @@ def sync_ical_source(db: Session, source_id: int) -> str: def _process_uid_group_with_helpers( db_session, uid: str, + calendar_tz: ZoneInfo, components: List, now: datetime, horizon: datetime, @@ -236,8 +288,11 @@ def _process_uid_group_with_helpers( # Base fields - dtstart = decoded_dt_with_tz(base, "DTSTART") # aware datetime or date - dtend = decoded_dt_with_tz(base, "DTEND") + raw_dtstart = decoded_dt_with_tz(base, "DTSTART") + raw_dtend = decoded_dt_with_tz(base, "DTEND") + dtstart = normalize_ics_datetime(raw_dtstart, calendar_tz) + dtend = normalize_ics_datetime(raw_dtend, calendar_tz) if raw_dtend else None + is_all_day = _is_all_day_component(base) # Convert to ISO strings for your helper @@ -326,7 +381,11 @@ def _process_uid_group_with_helpers( # NOTE: all datetimes passed as ISO until_val = (rrule.get("UNTIL") or [None])[0] # icalendar may give UNTIL as date/datetime; convert to ISO string if present - until_iso = _to_iso_for_helper(until_val, _looks_like_date(until_val)) if until_val else None + if until_val: + until_dt = normalize_ics_datetime(until_val, calendar_tz) + until_iso = until_dt.isoformat() + else: + until_iso = None by_day = rrule.get("BYDAY", []) if isinstance(by_day, str): by_day = [by_day] # Wrap in list if single string @@ -399,9 +458,10 @@ def _process_uid_group_with_helpers( # Iterate safely for ex in exdate_entries: for ex_date in ex.dts: + ex_dt = normalize_ics_datetime(ex_date.dt, calendar_tz) db_session.add(RecurrenceExdate( rrule_id=rule.id, - exdate=_ensure_aware(ex_date.dt).astimezone(timezone.utc) + exdate=ex_dt.astimezone(timezone.utc) )) # ---- Safe RDATE normalization ---- @@ -434,9 +494,10 @@ def _process_uid_group_with_helpers( rid = oc.get("RECURRENCE-ID") if not rid: continue + rid_dt = normalize_ics_datetime(rid.dt, calendar_tz) db_session.add(EventOverride( rrule_id=rule.id, - recurrence_date=_ensure_aware(rid.dt), + recurrence_date=rid_dt, new_start=decoded_dt_with_tz(oc, "DTSTART"), new_end=decoded_dt_with_tz(oc, "DTEND"), new_title=str(oc.get("SUMMARY") or None), @@ -462,7 +523,7 @@ def _process_uid_group_with_helpers( if changed or not _has_occurrence(db_session, event.id, _ensure_aware(dtstart).astimezone(timezone.utc), _ensure_aware(dtend).astimezone(timezone.utc) if dtend else _ensure_aware(dtstart).astimezone(timezone.utc)): # Write a single occurrence via your helper - event_saved_at = getattr(event, "last_updated_at", datetime.utcnow()) + event_saved_at = getattr(event, "last_updated_at", datetime.now(timezone.utc)) save_event_occurrence( db_session, diff --git a/app/utils/date.py b/app/utils/date.py index 41483f9..be674c0 100644 --- a/app/utils/date.py +++ b/app/utils/date.py @@ -2,6 +2,7 @@ from typing import Dict, List, Optional from zoneinfo import ZoneInfo from email.utils import parsedate_to_datetime +from dateutil.parser import isoparse DEFAULT_TZ = ZoneInfo("America/New_York") # pick what’s right for your feed @@ -97,6 +98,15 @@ def _ensure_aware(dt): return dt.replace(tzinfo=timezone.utc) return dt +def ensure_aware_datetime(dt): + if isinstance(dt, str): + dt = isoparse(dt) + + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + + return dt + def _parse_iso(iso_str: Optional[str]) -> Optional[datetime]: if not iso_str: return None diff --git a/tests/conftest.py b/tests/conftest.py index 92a9aaf..a754b0b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -71,3 +71,4 @@ def forbid_real_search_calls(mocker): from tests.factories.admin_factory import * from tests.factories.course_factory import * from tests.factories.course_agent_state_factory import * +from tests.factories.category_factory import * diff --git a/tests/factories/category_factory.py b/tests/factories/category_factory.py new file mode 100644 index 0000000..1ca4966 --- /dev/null +++ b/tests/factories/category_factory.py @@ -0,0 +1,15 @@ +import pytest +from app.models.models import Category + +@pytest.fixture +def category_factory(db): + def create_category(**kwargs): + category = Category( + name=kwargs.pop("name", "Test Category"), + org_id=kwargs.pop("org_id"), + **kwargs, + ) + db.add(category) + db.commit() + return category + return create_category diff --git a/tests/ical/test_calendar_timezone.py b/tests/ical/test_calendar_timezone.py new file mode 100644 index 0000000..d9933fa --- /dev/null +++ b/tests/ical/test_calendar_timezone.py @@ -0,0 +1,159 @@ +from icalendar import Calendar +from zoneinfo import ZoneInfo +from datetime import datetime + +from app.services.ical import ( + get_calendar_timezone, + normalize_ics_datetime, + import_ical_feed_using_helpers, +) +from app.models.models import Event, EventOccurrence + + +def test_get_calendar_timezone_from_x_wr(): + ics = """BEGIN:VCALENDAR +VERSION:2.0 +X-WR-TIMEZONE:America/New_York +BEGIN:VEVENT +UID:test-1 +DTSTART:20260914T180000 +END:VEVENT +END:VCALENDAR +""" + cal = Calendar.from_ical(ics) + tz = get_calendar_timezone(cal) + + assert tz == ZoneInfo("America/New_York") + + +def test_normalize_floating_dtstart(): + raw = datetime(2026, 9, 14, 18, 0) # floating + tz = ZoneInfo("America/New_York") + + dt = normalize_ics_datetime(raw, tz) + + assert dt.tzinfo == tz + assert dt.hour == 18 + + +def test_gcal_import_event_time(db, org_factory, category_factory, user_factory): + org = org_factory() + category = category_factory(org_id=org.id) + user = user_factory() + + ics = """BEGIN:VCALENDAR +VERSION:2.0 +X-WR-TIMEZONE:America/New_York +BEGIN:VEVENT +UID:test-gcal-1 +SUMMARY:Test Event +DTSTART:20260914T180000 +DTEND:20260914T220000 +END:VEVENT +END:VCALENDAR +""" + + result = import_ical_feed_using_helpers( + db_session=db, + ical_text_or_url=ics, + org_id=org.id, + category_id=category.id, + default_event_type="CLUB", + user_id=user.id, + ) + + assert result["success"] is True + + event = db.query(Event).filter_by(ical_uid="test-gcal-1").one() + + # Stored in UTC + assert event.start_datetime.tzinfo is not None + assert event.start_datetime.hour == 22 # 18 EDT β†’ 22 UTC + + # Convert back to local + local = event.start_datetime.astimezone(ZoneInfo("America/New_York")) + assert local.hour == 18 + + +def test_gcal_weekly_recurrence_hour_stable(db, org_factory, category_factory, user_factory): + org = org_factory() + category = category_factory(org_id=org.id) + user = user_factory() + + ics = """BEGIN:VCALENDAR +VERSION:2.0 +X-WR-TIMEZONE:America/New_York +BEGIN:VEVENT +UID:test-gcal-rrule +SUMMARY:Weekly Rehearsal +DTSTART:20260914T180000 +DTEND:20260914T200000 +RRULE:FREQ=WEEKLY;COUNT=3 +END:VEVENT +END:VCALENDAR +""" + + import_ical_feed_using_helpers( + db_session=db, + ical_text_or_url=ics, + org_id=org.id, + category_id=category.id, + default_event_type="CLUB", + user_id=user.id, + ) + + occs = ( + db.query(EventOccurrence) + .order_by(EventOccurrence.start_datetime) + .all() + ) + + assert len(occs) == 3 + + for occ in occs: + local = occ.start_datetime.astimezone(ZoneInfo("America/New_York")) + assert local.hour == 18 + + +def test_recurrence_override_matches_exact_hour(db, org_factory, category_factory, user_factory): + org = org_factory() + category = category_factory(org_id=org.id) + user = user_factory() + + ics = """BEGIN:VCALENDAR +VERSION:2.0 +X-WR-TIMEZONE:America/New_York +BEGIN:VEVENT +UID:test-override +DTSTART:20260914T180000 +RRULE:FREQ=WEEKLY;COUNT=2 +END:VEVENT +BEGIN:VEVENT +UID:test-override +RECURRENCE-ID:20260921T180000 +DTSTART:20260921T190000 +END:VEVENT +END:VCALENDAR +""" + + import_ical_feed_using_helpers( + db_session=db, + ical_text_or_url=ics, + org_id=org.id, + category_id=category.id, + default_event_type="CLUB", + user_id=user.id, + ) + + occs = ( + db.query(EventOccurrence) + .order_by(EventOccurrence.start_datetime) + .all() + ) + + hours = [ + occ.start_datetime.astimezone(ZoneInfo("America/New_York")).hour + for occ in occs + ] + + assert hours == [18, 19] From f4b6d991b89f7272953c21798536cfed7b580768 Mon Sep 17 00:00:00 2001 From: qianxuege Date: Fri, 9 Jan 2026 01:03:47 -0500 Subject: [PATCH 4/7] test: added test for rrule timezone in soc scraper --- tests/scraper_events/test_timezone.py | 64 ++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/tests/scraper_events/test_timezone.py b/tests/scraper_events/test_timezone.py index 827b408..7150261 100644 --- a/tests/scraper_events/test_timezone.py +++ b/tests/scraper_events/test_timezone.py @@ -110,4 +110,66 @@ def test_dst_transition_new_york(): 2026, 3, 30, 9, 0, tzinfo=ZoneInfo("America/New_York") ) - assert start == expected \ No newline at end of file + assert start == expected + +def test_event_and_rrule_timezone_consistency(): + soc = make_soc("Pittsburgh, Pennsylvania") + + org_id_by_key = {("15112", "Spring_26"): 1} + category_id_by_org = {1: 10} + + events, rrules = build_events_and_rrules( + [soc], + org_id_by_key, + category_id_by_org, + ) + + event = events[0] + rrule = rrules[0] + + # --- Event checks --- + assert event["start_datetime"].tzinfo is not None + assert event["end_datetime"].tzinfo is not None + assert event["start_datetime"].tzinfo.key == "America/New_York" + + # --- RRULE checks --- + assert rrule["start_datetime"].tzinfo is not None + assert rrule["start_datetime"].tzinfo.key == "America/New_York" + + # RRULE should match event local wall-clock time + assert rrule["start_datetime"].hour == event["start_datetime"].hour + assert rrule["start_datetime"].minute == event["start_datetime"].minute + +def test_rrule_dst_transition_new_york(): + soc = make_soc("Pittsburgh, Pennsylvania") + + org_id_by_key = {("15112", "Spring_26"): 1} + category_id_by_org = {1: 10} + + # ---- Pre-DST (EST) ---- + soc.sem_start = datetime(2026, 1, 12) # EST + events, rrules = build_events_and_rrules( + [soc], + org_id_by_key, + category_id_by_org, + ) + + rrule_start = rrules[0]["start_datetime"] + + assert rrule_start.utcoffset().total_seconds() == -5 * 3600 + assert rrule_start.isoformat().endswith("-05:00") + assert rrule_start.hour == 9 + + # ---- Post-DST (EDT) ---- + soc.sem_start = datetime(2026, 3, 30) # EDT + events, rrules = build_events_and_rrules( + [soc], + org_id_by_key, + category_id_by_org, + ) + + rrule_start = rrules[0]["start_datetime"] + + assert rrule_start.utcoffset().total_seconds() == -4 * 3600 + assert rrule_start.isoformat().endswith("-04:00") + assert rrule_start.hour == 9 From 5766e4b8c5a0dda1ddb89c0241c99810dcee05b8 Mon Sep 17 00:00:00 2001 From: qianxuege Date: Fri, 9 Jan 2026 01:46:32 -0500 Subject: [PATCH 5/7] test: added tests for deleting events for a given calendar source --- app/api/events.py | 36 ++++- app/models/calend | 0 app/models/calendar_source.py | 37 +++++- app/services/ical.py | 125 +++++++++++++++++- tests/conftest.py | 2 + tests/factories/calendar_source_factory.py | 42 ++++++ tests/factories/category_factory.py | 2 +- tests/factories/event_factory.py | 52 ++++++++ tests/factories/org_factory.py | 14 +- tests/factories/user_factory.py | 2 +- .../calendar_source/test_calendar_source.py | 30 +++++ 11 files changed, 334 insertions(+), 8 deletions(-) create mode 100644 app/models/calend create mode 100644 tests/factories/calendar_source_factory.py create mode 100644 tests/factories/event_factory.py create mode 100644 tests/models/calendar_source/test_calendar_source.py diff --git a/app/api/events.py b/app/api/events.py index f7a116b..40e1c1c 100644 --- a/app/api/events.py +++ b/app/api/events.py @@ -15,7 +15,7 @@ from datetime import datetime, timezone from sqlalchemy import cast, Date, or_, delete, select -from app.services.ical import import_ical_feed_using_helpers +from app.services.ical import delete_events_for_calendar_source, import_ical_feed_using_helpers from app.errors.ical import ICalFetchError from app.models.calendar_source import create_calendar_source @@ -240,6 +240,40 @@ def read_gcal_link(): "message": str(e), }), 500 +@events_bp.route("/delete_events_and_deactivate_calendar", methods=["DELETE"]) +def delete_events_and_deactivate_calendar(): + """Deletes all events associated with a calendar source ID and deactivates it""" + db = g.db + try: + data = request.get_json() + calendar_source_id = data.get("calendar_source_id") + + if not calendar_source_id: + return jsonify({"error": "Missing calendar_source_id"}), 400 + + deleted_event_ids = delete_events_for_calendar_source( + db=db, + calendar_source_id=calendar_source_id, + ) + + db.commit() + + return jsonify({ + "status": "ok", + "calendar_source_id": calendar_source_id, + "deleted_events": len(deleted_event_ids), + "event_ids": deleted_event_ids, + }), 200 + + except ValueError as e: + return jsonify({"error": str(e)}), 404 + + except Exception as e: + import traceback + print("❌ Exception:", traceback.format_exc()) + return jsonify({"error": str(e)}), 500 + + # @events_bp.route("/generate_more_occurrences", methods=["POST"]) # def generate_more_occurrences(): # db = g.db diff --git a/app/models/calend b/app/models/calend new file mode 100644 index 0000000..e69de29 diff --git a/app/models/calendar_source.py b/app/models/calendar_source.py index feca4a0..1bc2271 100644 --- a/app/models/calendar_source.py +++ b/app/models/calendar_source.py @@ -1,5 +1,7 @@ from app.models.models import CalendarSource from typing import Optional +from sqlalchemy.orm import Session +from datetime import datetime, timezone def create_calendar_source( db_session, @@ -26,4 +28,37 @@ def create_calendar_source( db_session.add(calendar_source) db_session.flush() - return calendar_source \ No newline at end of file + return calendar_source + + +def deactivate_calendar_source( + db: Session, + calendar_source_id: int, +) -> CalendarSource: + """ + Mark a CalendarSource as inactive. + + - Acquires a row-level lock + - Updates active flag and updated_at + - Does NOT commit (caller controls transaction) + + Returns: + The updated CalendarSource + """ + + calendar_source = ( + db.query(CalendarSource) + .filter(CalendarSource.id == calendar_source_id) + .with_for_update() + .one_or_none() + ) + + if not calendar_source: + raise ValueError("CalendarSource not found") + + if calendar_source.active: + calendar_source.active = False + calendar_source.updated_at = datetime.now(timezone.utc) + + db.flush() + return calendar_source diff --git a/app/services/ical.py b/app/services/ical.py index b3a9214..0009b8d 100644 --- a/app/services/ical.py +++ b/app/services/ical.py @@ -1,7 +1,9 @@ from icalendar import Calendar +from app.models.calendar_source import deactivate_calendar_source from app.utils.date import _ensure_aware, _parse_iso, decoded_dt_with_tz, infer_semester_from_datetime, parsed_httpdate_to_dt from zoneinfo import ZoneInfo +from sqlalchemy import select, delete from datetime import datetime, timedelta, timezone, date from typing import Dict, List, Optional @@ -9,7 +11,6 @@ from requests.exceptions import HTTPError, Timeout, ConnectionError from app.errors.ical import ICalFetchError import os -from sqlalchemy import select from sqlalchemy.orm import Session import hashlib @@ -647,3 +648,125 @@ def _pick_base_component(bases: List): if with_rrule: return with_rrule[0] return sorted(bases, key=lambda b: decoded_dt_with_tz(b, "DTSTART"))[0] + +from app.models.models import ( + Event, + CalendarSource, + EventOccurrence, + EventTag, + UserSavedEvent, + Academic, + Career, + Club, + RecurrenceRule, + RecurrenceExdate, + RecurrenceRdate, + EventOverride, + RecurrenceOverride, +) + + +def delete_events_for_calendar_source( + db: Session, + calendar_source_id: int, +) -> list[int]: + """ + Delete all events associated with a CalendarSource, including + all dependent rows, and deactivate the CalendarSource. + + Returns: + List of deleted event IDs. + """ + + # Lock and fetch calendar source + calendar_source = ( + db.query(CalendarSource) + .filter(CalendarSource.id == calendar_source_id) + .with_for_update() + .one_or_none() + ) + + if not calendar_source: + raise ValueError("CalendarSource not found") + + source_url = calendar_source.url + + # Collect event IDs once + event_id_list = ( + db.query(Event.id) + .filter(Event.source_url == source_url) + .all() + ) + event_id_list = [eid for (eid,) in event_id_list] + + if not event_id_list: + # Still deactivate calendar source + calendar_source.active = False + calendar_source.updated_at = datetime.now(timezone.utc) + db.flush() + return [] + + event_id_subq = select(Event.id).where(Event.id.in_(event_id_list)) + + # Delete Event-level children + db.execute( + delete(EventOccurrence).where(EventOccurrence.event_id.in_(event_id_subq)) + ) + db.execute( + delete(EventTag).where(EventTag.event_id.in_(event_id_subq)) + ) + db.execute( + delete(UserSavedEvent).where(UserSavedEvent.event_id.in_(event_id_subq)) + ) + db.execute( + delete(Academic).where(Academic.event_id.in_(event_id_subq)) + ) + db.execute( + delete(Career).where(Career.event_id.in_(event_id_subq)) + ) + db.execute( + delete(Club).where(Club.event_id.in_(event_id_subq)) + ) + + # Handle recurrence hierarchy + rrule_ids = ( + select(RecurrenceRule.id) + .where(RecurrenceRule.event_id.in_(event_id_subq)) + ) + + db.execute( + delete(RecurrenceExdate).where( + RecurrenceExdate.rrule_id.in_(rrule_ids) + ) + ) + db.execute( + delete(RecurrenceRdate).where( + RecurrenceRdate.rrule_id.in_(rrule_ids) + ) + ) + db.execute( + delete(EventOverride).where( + EventOverride.rrule_id.in_(rrule_ids) + ) + ) + db.execute( + delete(RecurrenceOverride).where( + RecurrenceOverride.rrule_id.in_(rrule_ids) + ) + ) + db.execute( + delete(RecurrenceRule).where( + RecurrenceRule.id.in_(rrule_ids) + ) + ) + + # Delete events + db.execute( + delete(Event).where(Event.id.in_(event_id_subq)) + ) + + # Deactivate calendar source + deactivate_calendar_source(db, calendar_source_id) + + db.flush() + return event_id_list diff --git a/tests/conftest.py b/tests/conftest.py index a754b0b..b52e927 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -72,3 +72,5 @@ def forbid_real_search_calls(mocker): from tests.factories.course_factory import * from tests.factories.course_agent_state_factory import * from tests.factories.category_factory import * +from tests.factories.event_factory import * +from tests.factories.calendar_source_factory import * diff --git a/tests/factories/calendar_source_factory.py b/tests/factories/calendar_source_factory.py new file mode 100644 index 0000000..4c53d16 --- /dev/null +++ b/tests/factories/calendar_source_factory.py @@ -0,0 +1,42 @@ +import pytest +from datetime import datetime, timezone +import uuid + +from app.models.models import CalendarSource + + +@pytest.fixture +def calendar_source_factory(db, org_factory, category_factory): + def create_calendar_source(**kwargs): + org = kwargs.pop("org", None) or org_factory() + category = kwargs.pop("category", None) or category_factory(org_id=org.id) + + source = CalendarSource( + url=kwargs.pop( + "url", + f"https://calendar.google.com/calendar/ical/test/{uuid.uuid4()}.ics" + ), + org_id=kwargs.pop("org_id", org.id), + category_id=kwargs.pop("category_id", category.id), + active=kwargs.pop("active", True), + sync_mode=kwargs.pop("sync_mode", "delta"), + fetch_interval_seconds=kwargs.pop("fetch_interval_seconds", 3600), + default_event_type=kwargs.pop("default_event_type", "CLUB"), + notes=kwargs.pop("notes", None), + created_by_user_id=kwargs.pop("created_by_user_id", None), + last_sync_status=kwargs.pop("last_sync_status", None), + content_hash=kwargs.pop("content_hash", None), + etag=kwargs.pop("etag", None), + last_modified_hdr=kwargs.pop("last_modified_hdr", None), + locked_at=kwargs.pop("locked_at", None), + lock_owner=kwargs.pop("lock_owner", None), + created_at=kwargs.pop("created_at", datetime.now(timezone.utc)), + updated_at=kwargs.pop("updated_at", datetime.now(timezone.utc)), + **kwargs, + ) + + db.add(source) + db.flush() + return source + + return create_calendar_source diff --git a/tests/factories/category_factory.py b/tests/factories/category_factory.py index 1ca4966..e214656 100644 --- a/tests/factories/category_factory.py +++ b/tests/factories/category_factory.py @@ -10,6 +10,6 @@ def create_category(**kwargs): **kwargs, ) db.add(category) - db.commit() + db.flush() return category return create_category diff --git a/tests/factories/event_factory.py b/tests/factories/event_factory.py new file mode 100644 index 0000000..138487d --- /dev/null +++ b/tests/factories/event_factory.py @@ -0,0 +1,52 @@ +import pytest +from datetime import datetime, timezone, timedelta + +from app.models.models import Event + +import uuid + +@pytest.fixture +def event_factory(db, org_factory, category_factory): + def create_event(**kwargs): + org = kwargs.pop("org", None) or org_factory() + category = kwargs.pop("category", None) or category_factory(org_id=org.id) + + start = kwargs.pop( + "start_datetime", + datetime(2026, 1, 15, 18, 0, tzinfo=timezone.utc), + ) + + event = Event( + org_id=org.id, + category_id=category.id, + title=kwargs.pop("title", "Test Event"), + description=kwargs.pop("description", None), + location=kwargs.pop("location", "Test Location"), + + start_datetime=start, + end_datetime=kwargs.pop("end_datetime", start + timedelta(hours=2)), + is_all_day=kwargs.pop("is_all_day", False), + semester=kwargs.pop("semester", "Spring_26"), + + user_edited=kwargs.pop("user_edited", []), + source_url=kwargs.pop("source_url", None), + event_type=kwargs.pop("event_type", "CLUB"), + ical_uid=kwargs.pop( + "ical_uid", + f"test-{uuid.uuid4()}" + ), + ical_sequence=kwargs.pop("ical_sequence", 0), + ical_last_modified=kwargs.pop("ical_last_modified", None), + + last_updated_at=kwargs.pop("last_updated_at", datetime.now(timezone.utc)), + ) + + db.add(event) + db.flush() + return event + + def create_batch(n, **kwargs): + return [create_event(**kwargs) for _ in range(n)] + + create_event.create_batch = create_batch + return create_event \ No newline at end of file diff --git a/tests/factories/org_factory.py b/tests/factories/org_factory.py index fb2443b..f0b0afb 100644 --- a/tests/factories/org_factory.py +++ b/tests/factories/org_factory.py @@ -1,14 +1,22 @@ import pytest from app.models.models import Organization +# tests/factories/org_factory.py +import pytest +import uuid +from app.models.models import Organization + @pytest.fixture def org_factory(db): def create_org(**kwargs): org = Organization( - name=kwargs.pop("name", "Test Org"), - **kwargs, + name=kwargs.pop("name", f"Test Org {uuid.uuid4()}"), + tags=kwargs.pop("tags", None), + description=kwargs.pop("description", None), + type=kwargs.pop("type", None), ) db.add(org) - db.commit() + db.flush() return org return create_org + diff --git a/tests/factories/user_factory.py b/tests/factories/user_factory.py index 3613a34..c421171 100644 --- a/tests/factories/user_factory.py +++ b/tests/factories/user_factory.py @@ -10,6 +10,6 @@ def create_user(**kwargs): **kwargs, ) db.add(user) - db.commit() + db.flush() return user return create_user diff --git a/tests/models/calendar_source/test_calendar_source.py b/tests/models/calendar_source/test_calendar_source.py new file mode 100644 index 0000000..02515e0 --- /dev/null +++ b/tests/models/calendar_source/test_calendar_source.py @@ -0,0 +1,30 @@ +from app.models import Event, CalendarSource +from app.models.calendar_source import deactivate_calendar_source +from app.services.ical import delete_events_for_calendar_source + + +def test_delete_events_for_calendar_source(db, calendar_source_factory, event_factory): + source = calendar_source_factory(active=True) + events = event_factory.create_batch( + 3, + source_url=source.url, + ) + + deleted_ids = delete_events_for_calendar_source(db, source.id) + db.commit() + + assert len(deleted_ids) == 3 + assert db.query(Event).count() == 0 + + refreshed = db.get(CalendarSource, source.id) + assert refreshed.active is False + + +def test_deactivate_calendar_source(db, calendar_source_factory): + source = calendar_source_factory(active=True) + + deactivate_calendar_source(db, source.id) + db.commit() + + refreshed = db.get(CalendarSource, source.id) + assert refreshed.active is False From 2fcfd407fd1eec5e29ebe8049e692226ce0c3072 Mon Sep 17 00:00:00 2001 From: qianxuege Date: Fri, 9 Jan 2026 02:34:48 -0500 Subject: [PATCH 6/7] feat: prevents duplicate imports, events are uniquely identified by calendar source and ical uid --- README.md | 1 + app/api/events.py | 34 ++++---- app/models/models.py | 11 ++- app/services/ical.py | 21 +++-- ...d803e9c36ef_avoid_duplicate_ical_import.py | 38 +++++++++ tests/factories/event_factory.py | 2 + tests/ical/test_calendar_timezone.py | 34 ++++---- ...st_ical_import_idempotent_no_duplicates.py | 84 +++++++++++++++++++ .../calendar_source/test_calendar_source.py | 6 +- 9 files changed, 188 insertions(+), 43 deletions(-) create mode 100644 migrations/versions/ad803e9c36ef_avoid_duplicate_ical_import.py create mode 100644 tests/ical/test_ical_import_idempotent_no_duplicates.py diff --git a/README.md b/README.md index 2b25c66..fab4e42 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,7 @@ APP_ENV=production alembic upgrade head - `upgrade head` = run migrations + bump version. - `stamp head` = bump version only, no migrations executed. +- `alembic downgrade -1` = undo the latest migration ## 6. Course data - see scraper readme instructions on `export_soc`. Edit Railway cron job at [this link](https://railway.com/project/065650eb-f1b3-4b72-9bdc-e0d0a9457205?environmentId=5b57ba27-25e6-44e5-91b8-6b602ae891f3 diff --git a/app/api/events.py b/app/api/events.py index 40e1c1c..ed827db 100644 --- a/app/api/events.py +++ b/app/api/events.py @@ -170,21 +170,6 @@ def read_gcal_link(): if not user: return jsonify({"error": "User not found"}), 404 - ics_message = import_ical_feed_using_helpers( - db_session=db, - ical_text_or_url=gcal_link, - org_id=org_id, - category_id=category_id, - semester=semester, - default_event_type=event_type, - user_id=user.id, - source_url=gcal_link - ) - print(ics_message) - # Handle parse-level failure - if ics_message.get("success") is False: - return jsonify(ics_message), 400 - # Ensure CalendarSource exists (category ⟢ many sources), # store the gcal link if it is not already stored calendar_source = ( @@ -213,6 +198,25 @@ def read_gcal_link(): calendar_source.notes = notes if event_type is not None: calendar_source.default_event_type = event_type + if calendar_source.active is False: + calendar_source.active = True + db.flush() + + ics_message = import_ical_feed_using_helpers( + db_session=db, + ical_text_or_url=gcal_link, + org_id=org_id, + category_id=category_id, + semester=semester, + default_event_type=event_type, + user_id=user.id, + source_url=gcal_link, + calendar_source_id=calendar_source.id + ) + print(ics_message) + # Handle parse-level failure + if ics_message.get("success") is False: + return jsonify(ics_message), 400 db.commit() # Only commit if all succeeded return jsonify({ diff --git a/app/models/models.py b/app/models/models.py index df35fd7..53ac6a3 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -264,6 +264,7 @@ class CalendarSource(Base): updated_at: Mapped[datetime.datetime] = mapped_column(DateTime(True), server_default=text('now()')) created_by_user_id: Mapped[Optional[int]] = mapped_column(BigInteger) # relationships + events: Mapped[List['Event']] = relationship("Event", back_populates="calendar_source", cascade="all, delete-orphan", passive_deletes=True) category: Mapped['Category'] = relationship('Category', back_populates='calendar_sources') org: Mapped['Organization'] = relationship('Organization', back_populates='calendar_sources') @@ -309,9 +310,10 @@ class SyncedEvent(Base): class Event(Base): __tablename__ = 'events' __table_args__ = ( + ForeignKeyConstraint(['calendar_source_id'], ['calendar_sources.id'], ondelete='SET NULL', name='events_calendar_source_id_fkey'), ForeignKeyConstraint(['category_id'], ['categories.id'], ondelete='CASCADE', name='events_category_id_fkey'), ForeignKeyConstraint(['org_id'], ['organizations.id'], ondelete='CASCADE', name='events_org_id_fkey'), - # PrimaryKeyConstraint('id', name='events_pkey') + # Dedupe SOC events only UniqueConstraint( "org_id", "title", @@ -320,7 +322,9 @@ class Event(Base): "end_datetime", "location", name="events_unique_soc", - ) + ), + # Hard guarantee for ICS imports + UniqueConstraint("calendar_source_id", "ical_uid", name="events_unique_calendar_uid"), ) id: Mapped[int] = mapped_column(BigInteger, Identity(start=1, increment=1, minvalue=1, maxvalue=9223372036854775807, cycle=False, cache=1), primary_key=True) @@ -335,6 +339,8 @@ class Event(Base): user_edited: Mapped[Optional[list]] = mapped_column(ARRAY(BigInteger())) org_id: Mapped[int] = mapped_column(BigInteger) category_id: Mapped[int] = mapped_column(BigInteger) + calendar_source_id: Mapped[Optional[int]] = mapped_column(BigInteger, nullable=True, index=True) + description: Mapped[Optional[str]] = mapped_column(Text) source_url: Mapped[Optional[str]] = mapped_column(Text) event_type: Mapped[Optional[str]] = mapped_column(Text) @@ -345,6 +351,7 @@ class Event(Base): occurrences_valid_through: Mapped[Optional[datetime.datetime]] = mapped_column(DateTime(True)) last_occurrence_build_at: Mapped[Optional[datetime.datetime]] = mapped_column(DateTime(True)) + calendar_source: Mapped[Optional['CalendarSource']] = relationship('CalendarSource', back_populates='events') category: Mapped['Category'] = relationship('Category', back_populates='events') org: Mapped['Organization'] = relationship('Organization', back_populates='events') event_occurrences: Mapped[List['EventOccurrence']] = relationship('EventOccurrence', back_populates='event', passive_deletes=True) diff --git a/app/services/ical.py b/app/services/ical.py index 0009b8d..c8653f2 100644 --- a/app/services/ical.py +++ b/app/services/ical.py @@ -75,6 +75,7 @@ def import_ical_feed_using_helpers( *, org_id: int, category_id: int, + calendar_source_id: int, semester: Optional[str] = None, default_event_type: Optional[str] = None, # e.g. "CLUB"/"ACADEMIC"/"CAREER"/"OH"/NONE source_url: Optional[str] = None, @@ -134,14 +135,15 @@ def import_ical_feed_using_helpers( default_event_type=default_event_type, source_url=source_url, user_id=user_id, - semester=semester + semester=semester, + calendar_source_id=calendar_source_id ) if event_id: event_ids.append(event_id) # 4) Optionally delete events no longer present if delete_missing_uids: - existing_uids = {row[0] for row in db_session.query(Event.ical_uid).filter(Event.org_id==org_id, Event.category_id==category_id).all()} + existing_uids = {row[0] for row in db_session.query(Event.ical_uid).filter(Event.calendar_source_id == calendar_source_id).all()} missing = list(existing_uids - incoming_uids) if missing: db_session.query(Event).filter(Event.ical_uid.in_(missing)).delete(synchronize_session=False) @@ -212,6 +214,7 @@ def sync_ical_source(db: Session, source_id: int) -> str: result = import_ical_feed_using_helpers( db_session=db, ical_text_or_url=body, + calendar_source_id=source.id, org_id=source.org_id, category_id=source.category_id, default_event_type=source.default_event_type, @@ -255,20 +258,20 @@ def sync_ical_source(db: Session, source_id: int) -> str: def _process_uid_group_with_helpers( + *, db_session, uid: str, calendar_tz: ZoneInfo, components: List, now: datetime, horizon: datetime, - *, + calendar_source_id: int, org_id: int, category_id: int, default_event_type: Optional[str], source_url: Optional[str], - # user_edited: Optional[bool], user_id: Optional[int], - semester: Optional[str] + semester: Optional[str], ): # Split: base components (no RECURRENCE-ID) vs overrides base_candidates = [c for c in components if not c.get("RECURRENCE-ID")] @@ -310,7 +313,7 @@ def _process_uid_group_with_helpers( # --- DEDUPE LOOKUP (scope by org+category; add source_id if you have it) --- existing = (db_session.query(Event) - .filter_by(org_id=org_id, category_id=category_id, ical_uid=uid) + .filter_by(calendar_source_id=calendar_source_id, ical_uid=uid) .first()) changed = _should_update(existing, seq, last_modified) @@ -332,6 +335,7 @@ def _process_uid_group_with_helpers( existing.source_url = source_url existing.event_type = default_event_type existing.semester = event_semester + existing.calendar_source_id = calendar_source_id user_edited = existing.user_edited if existing.user_edited else [] user_edited.append(user_id) @@ -365,6 +369,7 @@ def _process_uid_group_with_helpers( semester=event_semester, user_edited=[user_id] ) + event.calendar_source_id = calendar_source_id db_session.flush() # Add iCal metadata directly on the persisted model event.ical_uid = uid @@ -689,12 +694,10 @@ def delete_events_for_calendar_source( if not calendar_source: raise ValueError("CalendarSource not found") - source_url = calendar_source.url - # Collect event IDs once event_id_list = ( db.query(Event.id) - .filter(Event.source_url == source_url) + .filter(Event.calendar_source_id == calendar_source_id) .all() ) event_id_list = [eid for (eid,) in event_id_list] diff --git a/migrations/versions/ad803e9c36ef_avoid_duplicate_ical_import.py b/migrations/versions/ad803e9c36ef_avoid_duplicate_ical_import.py new file mode 100644 index 0000000..ce955c6 --- /dev/null +++ b/migrations/versions/ad803e9c36ef_avoid_duplicate_ical_import.py @@ -0,0 +1,38 @@ +"""avoid duplicate ical import + +Revision ID: ad803e9c36ef +Revises: 3905128935dc +Create Date: 2026-01-09 02:25:27.352329 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'ad803e9c36ef' +down_revision: Union[str, Sequence[str], None] = '3905128935dc' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('events', sa.Column('calendar_source_id', sa.BigInteger(), nullable=True)) + op.create_unique_constraint('events_unique_calendar_uid', 'events', ['calendar_source_id', 'ical_uid']) + op.create_index(op.f('ix_events_calendar_source_id'), 'events', ['calendar_source_id'], unique=False) + op.create_foreign_key('events_calendar_source_id_fkey', 'events', 'calendar_sources', ['calendar_source_id'], ['id'], ondelete='SET NULL') + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint('events_calendar_source_id_fkey', 'events', type_='foreignkey') + op.drop_index(op.f('ix_events_calendar_source_id'), table_name='events') + op.drop_constraint('events_unique_calendar_uid', 'events', type_='unique') + op.drop_column('events', 'calendar_source_id') + # ### end Alembic commands ### diff --git a/tests/factories/event_factory.py b/tests/factories/event_factory.py index 138487d..ebe76b4 100644 --- a/tests/factories/event_factory.py +++ b/tests/factories/event_factory.py @@ -10,6 +10,7 @@ def event_factory(db, org_factory, category_factory): def create_event(**kwargs): org = kwargs.pop("org", None) or org_factory() category = kwargs.pop("category", None) or category_factory(org_id=org.id) + calendar_source_id = kwargs.pop("calendar_source_id", None) start = kwargs.pop( "start_datetime", @@ -19,6 +20,7 @@ def create_event(**kwargs): event = Event( org_id=org.id, category_id=category.id, + calendar_source_id=calendar_source_id, title=kwargs.pop("title", "Test Event"), description=kwargs.pop("description", None), location=kwargs.pop("location", "Test Location"), diff --git a/tests/ical/test_calendar_timezone.py b/tests/ical/test_calendar_timezone.py index d9933fa..ee69647 100644 --- a/tests/ical/test_calendar_timezone.py +++ b/tests/ical/test_calendar_timezone.py @@ -36,11 +36,15 @@ def test_normalize_floating_dtstart(): assert dt.hour == 18 -def test_gcal_import_event_time(db, org_factory, category_factory, user_factory): +def test_gcal_import_event_time( + db, org_factory, category_factory, user_factory, calendar_source_factory +): org = org_factory() category = category_factory(org_id=org.id) user = user_factory() + source = calendar_source_factory(org=org, category=category) + ics = """BEGIN:VCALENDAR VERSION:2.0 X-WR-TIMEZONE:America/New_York @@ -58,6 +62,7 @@ def test_gcal_import_event_time(db, org_factory, category_factory, user_factory) ical_text_or_url=ics, org_id=org.id, category_id=category.id, + calendar_source_id=source.id, # βœ… REQUIRED default_event_type="CLUB", user_id=user.id, ) @@ -66,19 +71,20 @@ def test_gcal_import_event_time(db, org_factory, category_factory, user_factory) event = db.query(Event).filter_by(ical_uid="test-gcal-1").one() - # Stored in UTC assert event.start_datetime.tzinfo is not None assert event.start_datetime.hour == 22 # 18 EDT β†’ 22 UTC - # Convert back to local local = event.start_datetime.astimezone(ZoneInfo("America/New_York")) assert local.hour == 18 -def test_gcal_weekly_recurrence_hour_stable(db, org_factory, category_factory, user_factory): +def test_gcal_weekly_recurrence_hour_stable( + db, org_factory, category_factory, user_factory, calendar_source_factory +): org = org_factory() category = category_factory(org_id=org.id) user = user_factory() + source = calendar_source_factory(org=org, category=category) ics = """BEGIN:VCALENDAR VERSION:2.0 @@ -98,16 +104,12 @@ def test_gcal_weekly_recurrence_hour_stable(db, org_factory, category_factory, u ical_text_or_url=ics, org_id=org.id, category_id=category.id, + calendar_source_id=source.id, default_event_type="CLUB", user_id=user.id, ) - occs = ( - db.query(EventOccurrence) - .order_by(EventOccurrence.start_datetime) - .all() - ) - + occs = db.query(EventOccurrence).order_by(EventOccurrence.start_datetime).all() assert len(occs) == 3 for occ in occs: @@ -115,10 +117,13 @@ def test_gcal_weekly_recurrence_hour_stable(db, org_factory, category_factory, u assert local.hour == 18 -def test_recurrence_override_matches_exact_hour(db, org_factory, category_factory, user_factory): +def test_recurrence_override_matches_exact_hour( + db, org_factory, category_factory, user_factory, calendar_source_factory +): org = org_factory() category = category_factory(org_id=org.id) user = user_factory() + source = calendar_source_factory(org=org, category=category) ics = """BEGIN:VCALENDAR VERSION:2.0 @@ -141,15 +146,12 @@ def test_recurrence_override_matches_exact_hour(db, org_factory, category_factor ical_text_or_url=ics, org_id=org.id, category_id=category.id, + calendar_source_id=source.id, default_event_type="CLUB", user_id=user.id, ) - occs = ( - db.query(EventOccurrence) - .order_by(EventOccurrence.start_datetime) - .all() - ) + occs = db.query(EventOccurrence).order_by(EventOccurrence.start_datetime).all() hours = [ occ.start_datetime.astimezone(ZoneInfo("America/New_York")).hour diff --git a/tests/ical/test_ical_import_idempotent_no_duplicates.py b/tests/ical/test_ical_import_idempotent_no_duplicates.py new file mode 100644 index 0000000..e116580 --- /dev/null +++ b/tests/ical/test_ical_import_idempotent_no_duplicates.py @@ -0,0 +1,84 @@ +import pytest +from app.models.models import Event, CalendarSource +from app.services.ical import import_ical_feed_using_helpers +from app.models.calendar_source import create_calendar_source + + +SIMPLE_ICS = """BEGIN:VCALENDAR +VERSION:2.0 +PRODID:-//Test//EN +BEGIN:VEVENT +UID:test-uid-123 +DTSTAMP:20250101T000000Z +DTSTART:20250110T150000Z +DTEND:20250110T160000Z +SUMMARY:Test Event +DESCRIPTION:Hello world +LOCATION:Test Room +END:VEVENT +END:VCALENDAR +""" + + +def test_ical_import_idempotent_no_duplicates(db, org_factory, category_factory, user_factory): + """ + Importing the same ICS twice for the same CalendarSource + must NOT create duplicate Events. + """ + + org = org_factory() + category = category_factory(org_id=org.id) + user = user_factory() + + # Create CalendarSource + calendar_source = create_calendar_source( + db_session=db, + url="https://example.com/test.ics", + org_id=org.id, + category_id=category.id, + active=True, + created_by_user_id=user.id, + ) + db.flush() + + # First import + result1 = import_ical_feed_using_helpers( + db_session=db, + ical_text_or_url=SIMPLE_ICS, + calendar_source_id=calendar_source.id, + org_id=org.id, + category_id=category.id, + user_id=user.id, + source_url=calendar_source.url, + ) + assert result1["success"] is True + + events_after_first = ( + db.query(Event) + .filter(Event.calendar_source_id == calendar_source.id) + .all() + ) + assert len(events_after_first) == 1 + first_event_id = events_after_first[0].id + + # Second import (same ICS) + result2 = import_ical_feed_using_helpers( + db_session=db, + ical_text_or_url=SIMPLE_ICS, + calendar_source_id=calendar_source.id, + org_id=org.id, + category_id=category.id, + user_id=user.id, + source_url=calendar_source.url, + ) + assert result2["success"] is True + + events_after_second = ( + db.query(Event) + .filter(Event.calendar_source_id == calendar_source.id) + .all() + ) + + # THE CRITICAL ASSERTION + assert len(events_after_second) == 1 + assert events_after_second[0].id == first_event_id diff --git a/tests/models/calendar_source/test_calendar_source.py b/tests/models/calendar_source/test_calendar_source.py index 02515e0..be6308d 100644 --- a/tests/models/calendar_source/test_calendar_source.py +++ b/tests/models/calendar_source/test_calendar_source.py @@ -3,10 +3,14 @@ from app.services.ical import delete_events_for_calendar_source -def test_delete_events_for_calendar_source(db, calendar_source_factory, event_factory): +def test_delete_events_for_calendar_source( + db, calendar_source_factory, event_factory +): source = calendar_source_factory(active=True) + events = event_factory.create_batch( 3, + calendar_source_id=source.id, source_url=source.url, ) From 8cdb83cfccbea4919e5132af3e9b83d214e7e596 Mon Sep 17 00:00:00 2001 From: qianxuege Date: Mon, 12 Jan 2026 16:32:55 -0500 Subject: [PATCH 7/7] feat: refactored admin table into admins and admin_categories --- app/models/models.py | 62 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/app/models/models.py b/app/models/models.py index 53ac6a3..237f283 100644 --- a/app/models/models.py +++ b/app/models/models.py @@ -184,25 +184,67 @@ class User(Base): synced_events: Mapped[List['SyncedEvent']] = relationship('SyncedEvent', back_populates='user') user_saved_events: Mapped[List['UserSavedEvent']] = relationship('UserSavedEvent', back_populates='user') - class Admin(Base): __tablename__ = 'admins' __table_args__ = ( - ForeignKeyConstraint(['category_id'], ['categories.id'], ondelete='CASCADE', name='admins_category_id_fkey'), - ForeignKeyConstraint(['org_id'], ['organizations.id'], ondelete='CASCADE', name='admins_org_id_fkey'), ForeignKeyConstraint(['user_id'], ['users.id'], ondelete='CASCADE', name='admins_user_id_fkey'), - PrimaryKeyConstraint('user_id', 'org_id', name='admins_pkey') + ForeignKeyConstraint(['org_id'], ['organizations.id'], ondelete='CASCADE', name='admins_org_id_fkey'), + PrimaryKeyConstraint('user_id', 'org_id', name='admins_pkey'), ) + # Scope: + # - admin β†’ org_id NOT NULL + # - manager β†’ org_id NOT NULL + # - website_manager β†’ org_id IS NULL + user_id: Mapped[int] = mapped_column(BigInteger, primary_key=True) - org_id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + # Nullable ONLY for website_manager + org_id: Mapped[Optional[int]] = mapped_column(BigInteger, primary_key=True) + role: Mapped[str] = mapped_column(Text, nullable=False) # 'admin', 'manager', 'website_manager' created_at: Mapped[datetime.datetime] = mapped_column(DateTime(True), server_default=text('now()')) - role: Mapped[Optional[str]] = mapped_column(Text) - category_id: Mapped[Optional[int]] = mapped_column(BigInteger) - category: Mapped[Optional['Category']] = relationship('Category', back_populates='admins') - org: Mapped['Organization'] = relationship('Organization', back_populates='admins') user: Mapped['User'] = relationship('User', back_populates='admins') + org: Mapped[Optional['Organization']] = relationship('Organization', back_populates='admins') + + admin_categories: Mapped[List['AdminCategory']] = relationship( + 'AdminCategory', + back_populates='admin', + cascade='all, delete-orphan' + ) + +class AdminCategory(Base): + __tablename__ = 'admin_categories' + __table_args__ = ( + ForeignKeyConstraint( + ['user_id', 'org_id'], + ['admins.user_id', 'admins.org_id'], + ondelete='CASCADE', + name='admin_categories_admin_fkey' + ), + ForeignKeyConstraint( + ['category_id'], + ['categories.id'], + ondelete='CASCADE', + name='admin_categories_category_id_fkey' + ), + PrimaryKeyConstraint( + 'user_id', + 'org_id', + 'category_id', + name='admin_categories_pkey' + ), + ) + + user_id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + org_id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + category_id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + + created_at: Mapped[datetime.datetime] = mapped_column(DateTime(True), server_default=text('now()')) + + admin: Mapped['Admin'] = relationship('Admin', back_populates='admin_categories') + category: Mapped['Category'] = relationship('Category', back_populates='admin_categories') + + class Category(Base): @@ -218,7 +260,7 @@ class Category(Base): created_at: Mapped[datetime.datetime] = mapped_column(DateTime(True), server_default=text('now()')) org: Mapped['Organization'] = relationship('Organization', back_populates='categories') - admins: Mapped[List['Admin']] = relationship('Admin', back_populates='category') + admin_categories: Mapped[List['AdminCategory']] = relationship('AdminCategory', back_populates='category') events: Mapped[List['Event']] = relationship('Event', back_populates='category') schedule_categories: Mapped[List['ScheduleCategory']] = relationship('ScheduleCategory', back_populates='category') event_occurrences: Mapped[List['EventOccurrence']] = relationship('EventOccurrence', back_populates='category')