diff --git a/src/meshapi/management/commands/dedupe_history_entries.py b/src/meshapi/management/commands/dedupe_history_entries.py new file mode 100644 index 00000000..733057b4 --- /dev/null +++ b/src/meshapi/management/commands/dedupe_history_entries.py @@ -0,0 +1,82 @@ +import logging +from argparse import ArgumentParser +from typing import Any + +from django.core.management.base import BaseCommand +from django.db import connection, models, transaction + +from meshapi.models.devices.device import Device +from meshapi.models.link import Link +from meshapi.models.los import LOS + + +class Command(BaseCommand): + help = """Deletes duplicate history entries on history tables. + Jazzband has their own, but it is slow, and has the potential to corrupt our database. + (https://django-simple-history.readthedocs.io/en/stable/utils.html#clean-duplicate-history) + This script should be less likely to do that, but DO NOT use this without ensuring + we have a recent backup.""" + + def add_arguments(self, parser: ArgumentParser) -> None: + pass + + def handle(self, *args: Any, **options: Any) -> None: + self.deduplicate_history(Link.objects.all()) + self.deduplicate_history(Device.objects.all()) + self.deduplicate_history(LOS.objects.all()) + + def deduplicate_history(self, model_objects: models.QuerySet) -> None: + for m in model_objects: + history_model = m.history.model + table_name = history_model._meta.db_table + + # Delete history from each object. Atomic block makes it go faster + with transaction.atomic(): + deleted_records = 0 + try: + history = m.history.all() + # If there's no history, bail + if not history: + continue + # This is the history object that last changed something + # XXX (wdn): I don't think I have a type I can put in here + # without some robot yelling at me + meaningful_history: list[Any] = [] + for h in history: + # Grab the first history object we come across + if not meaningful_history: + meaningful_history.append(h) + continue + delta = meaningful_history[-1].diff_against( + h, foreign_keys_are_objs=False # This makes foreign keys show up as UUIDs + ) + # If there were fields that changed meaningfully, then + # track that by updating last_meaningful_history and + # keep going + if delta.changes or delta.changed_fields: + # logging.info(f"Preserving Change: {delta}") + meaningful_history.append(h) + continue + + # Otherwise, delete the history object (just don't preserve it) + # h.delete() + deleted_records += 1 + + if not deleted_records: + continue + + logging.info(f"Deleting {deleted_records} from {m.id}, {m}") + + # Nuke history for this object + with connection.cursor() as cursor: + query = f"DELETE FROM {table_name} WHERE id = '{m.id}'" + # logging.info(query) + cursor.execute(query) + + # Replace the history with meaningful history + for mh in meaningful_history: + mh.pk = None + mh.save() + + except Exception as e: + logging.exception(f"Could not get history for this link: {e}") diff --git a/src/meshapi/tests/test_dedupe_history_entries.py b/src/meshapi/tests/test_dedupe_history_entries.py new file mode 100644 index 00000000..3eeb43d7 --- /dev/null +++ b/src/meshapi/tests/test_dedupe_history_entries.py @@ -0,0 +1,95 @@ +from django.core import management +from django.test import TestCase + +from meshapi.models.devices.device import Device +from meshapi.models.link import Link +from meshapi.models.node import Node + + +class TestDedupeHistoryEntries(TestCase): + def setUp(self) -> None: + self.node1 = Node( + network_number=1234, + status=Node.NodeStatus.ACTIVE, + type=Node.NodeType.STANDARD, + latitude=0, + longitude=0, + ) + self.node1.save() + + self.node2 = Node( + network_number=5678, + status=Node.NodeStatus.ACTIVE, + type=Node.NodeType.STANDARD, + latitude=0, + longitude=0, + ) + self.node2.save() + + self.node3 = Node( + network_number=9012, + status=Node.NodeStatus.ACTIVE, + type=Node.NodeType.STANDARD, + latitude=0, + longitude=0, + ) + self.node3.save() + + self.device1 = Device( + node=self.node1, + status=Device.DeviceStatus.ACTIVE, + name="nycmesh-1234-dev1", + ) + self.device1.save() + + self.device2 = Device( + node=self.node2, + status=Device.DeviceStatus.ACTIVE, + name="nycmesh-5678-dev2", + ) + self.device2.save() + + # This is a genuine change + self.device2.name = "hi mom" + self.device2.save() + + self.device3 = Device( + node=self.node3, + status=Device.DeviceStatus.ACTIVE, + name="nycmesh-9012-dev3", + ) + # Save the device multiple times to see if we can get some bad history + self.device3.save() + self.device3.save() + self.device3.save() + + self.link = Link( + from_device=self.device1, + to_device=self.device2, + status=Link.LinkStatus.ACTIVE, + type=Link.LinkType.FIVE_GHZ, + uisp_id="fake-uisp-uuid", + ) + + # Save the link multiple times to see if we can get some bad history + self.link.save() + self.link.save() + self.link.save() + self.link.save() + + def test_deduplicate_history(self): + # Ensure link has 4 entries + self.assertEqual(4, len(self.link.history.all())) + + # Ensure device3 has 3 + self.assertEqual(3, len(self.device3.history.all())) + + # etc + self.assertEqual(2, len(self.device2.history.all())) + self.assertEqual(1, len(self.device1.history.all())) + + management.call_command("dedupe_history_entries") + self.assertEqual(1, len(self.link.history.all())) + self.assertEqual(1, len(self.device3.history.all())) + self.assertEqual(2, len(self.device2.history.all())) + self.assertEqual(1, len(self.device1.history.all()))