Skip to content

fix(aci milestone 3): clean up orphaned objects from workflow engine #90796

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 2, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion migrations_lockfile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ tempest: 0002_make_message_type_nullable

uptime: 0037_fix_drift_default_to_db_default

workflow_engine: 0053_add_legacy_rule_indices
workflow_engine: 0054_clean_up_orphaned_metric_alert_objects
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Generated by Django 5.1.7 on 2025-05-01 20:38

import logging

from django.db import migrations, router, transaction
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps
from django.db.models import Exists, OuterRef

from sentry.new_migrations.migrations import CheckedMigration

logger = logging.getLogger(__name__)


def delete_orphaned_migrated_metric_alert_objects(
apps: StateApps, schema_editor: BaseDatabaseSchemaEditor
) -> None:
Action = apps.get_model("workflow_engine", "Action")
DataConditionGroup = apps.get_model("workflow_engine", "DataConditionGroup")
DataConditionGroupAction = apps.get_model("workflow_engine", "DataConditionGroupAction")
Detector = apps.get_model("workflow_engine", "Detector")
Workflow = apps.get_model("workflow_engine", "Workflow")
WorkflowDataConditionGroup = apps.get_model("workflow_engine", "WorkflowDataConditionGroup")

orphaned_dcgs = (
DataConditionGroup.objects.filter(
~Exists(Detector.objects.filter(workflow_condition_group_id=OuterRef("id")))
)
.filter(
~Exists(WorkflowDataConditionGroup.objects.filter(condition_group_id=OuterRef("id")))
)
.filter(~Exists(Workflow.objects.filter(when_condition_group_id=OuterRef("id"))))
)

orphaned_action_ids = DataConditionGroupAction.objects.filter(
Exists(orphaned_dcgs.filter(id=OuterRef("condition_group_id")))
).values_list("action__id", flat=True)

orphaned_actions = Action.objects.filter(id__in=orphaned_action_ids)

logger.info("orphaned action count: %s", orphaned_actions.count())
logger.info("orphaned dcg count: %s", orphaned_dcgs.count())

with transaction.atomic(router.db_for_write(Action)):
for action in orphaned_actions:
action.delete()
for dcg in orphaned_dcgs:
dcg.delete()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd also recommend not wrapping this entire operation in a transaction, because all involved rows will be locked for the whole transaction. Maybe it doesn't matter too much since they're orphaned, but better to be safe.

If you want to make sure the rows related to each other are in a transaction that's fine, but you'll just need to associate them and have a smaller transaction for each set of related rows. I think it's probably safe enough to just skip it though.



class Migration(CheckedMigration):
# This flag is used to mark that a migration shouldn't be automatically run in production.
# This should only be used for operations where it's safe to run the migration after your
# code has deployed. So this should not be used for most operations that alter the schema
# of a table.
# Here are some things that make sense to mark as post deployment:
# - Large data migrations. Typically we want these to be run manually so that they can be
# monitored and not block the deploy for a long period of time while they run.
# - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
# run this outside deployments so that we don't block them. Note that while adding an index
# is a schema change, it's completely safe to run the operation after the code has deployed.
# Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment

is_post_deployment = True
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Marked post-deploy, but it probably doesn't have to be.


dependencies = [
("workflow_engine", "0053_add_legacy_rule_indices"),
]

operations = [
migrations.RunPython(
code=delete_orphaned_migrated_metric_alert_objects,
reverse_code=migrations.RunPython.noop,
hints={"tables": ["workflow_engine.DataConditionGroup"]},
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from sentry.testutils.cases import TestMigrations
from sentry.workflow_engine.migration_helpers.alert_rule import dual_write_alert_rule
from sentry.workflow_engine.models import (
Action,
ActionAlertRuleTriggerAction,
AlertRuleDetector,
AlertRuleWorkflow,
DataCondition,
DataConditionGroup,
DataConditionGroupAction,
)


class TestCleanUpOrphanedMetricAlertObjects(TestMigrations):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's comment this out now that it has proved this runs fine, just to make sure ci doesn't end up breaking.

app = "workflow_engine"
migrate_from = "0053_add_legacy_rule_indices"
migrate_to = "0054_clean_up_orphaned_metric_alert_objects"

def setup_before_migration(self, apps):
self.alert_rule = self.create_alert_rule(name="hojicha")
self.trigger = self.create_alert_rule_trigger(alert_rule=self.alert_rule)
self.action = self.create_alert_rule_trigger_action(alert_rule_trigger=self.trigger)

dual_write_alert_rule(self.alert_rule)

detector = AlertRuleDetector.objects.get(alert_rule_id=self.alert_rule.id).detector
workflow = AlertRuleWorkflow.objects.get(alert_rule_id=self.alert_rule.id).workflow

detector.delete()
workflow.delete()

assert (
ActionAlertRuleTriggerAction.objects.filter(
alert_rule_trigger_action_id=self.action.id
).count()
== 1
)
assert Action.objects.count() == 1
assert DataConditionGroupAction.objects.count() == 1
# For each dual write attempt: one condition group on the detector, one action filter connected to the workflow
assert DataConditionGroup.objects.count() == 2
assert DataCondition.objects.count() == 3 # 2 detector triggers and one action filter DC

def test(self):
assert DataConditionGroup.objects.count() == 0
assert DataCondition.objects.count() == 0
assert (
ActionAlertRuleTriggerAction.objects.filter(
alert_rule_trigger_action_id=self.action.id
).count()
== 0
)
assert Action.objects.count() == 0
assert DataConditionGroupAction.objects.count() == 0
Loading