From 8a90ec1bb8af01ea7bd4bd097760c19193891e8b Mon Sep 17 00:00:00 2001 From: Paul Choisel Date: Fri, 11 Jul 2025 17:59:57 +0200 Subject: [PATCH] Fix private tag anonymization when using ranges --- dicomanonymizer/simpledicomanonymizer.py | 11 ++- tests/test_anon.py | 96 +++++++++++++++++++----- 2 files changed, 85 insertions(+), 22 deletions(-) diff --git a/dicomanonymizer/simpledicomanonymizer.py b/dicomanonymizer/simpledicomanonymizer.py index de33a89..0fd4c5a 100644 --- a/dicomanonymizer/simpledicomanonymizer.py +++ b/dicomanonymizer/simpledicomanonymizer.py @@ -477,10 +477,13 @@ def anonymize_dataset( def range_callback(dataset, data_element): if ( - data_element.tag.group & tag[2] == tag[0] - and data_element.tag.element & tag[3] == tag[1] + data_element.tag.group & tag[2] == tag[0] & tag[2] + and data_element.tag.element & tag[3] == tag[1] & tag[3] ): - action(dataset, (data_element.tag.group, data_element.tag.element)) + tag_tuple = (data_element.tag.group, data_element.tag.element) + action(dataset, tag_tuple) + if dataset.get(tag_tuple) and data_element.is_private: + private_tags.append(get_private_tag(dataset, tag_tuple)) element = None @@ -499,10 +502,12 @@ def range_callback(dataset, data_element): action(dataset.file_meta, tag) else: action(dataset, tag) + try: element = dataset.get(tag) except KeyError: print("Cannot get element from tag: ", tag_to_hex_strings(tag)) + continue # Get private tag to restore it later if element and element.tag.is_private: diff --git a/tests/test_anon.py b/tests/test_anon.py index 97b05f1..fb80ded 100644 --- a/tests/test_anon.py +++ b/tests/test_anon.py @@ -1,4 +1,5 @@ import copy +import pydicom import pytest import warnings @@ -8,7 +9,7 @@ from pydicom.config import settings, IGNORE from pydicom.data import get_testdata_files -from dicomanonymizer.simpledicomanonymizer import anonymize_dataset +from dicomanonymizer.simpledicomanonymizer import anonymize_dataset, keep from dicomanonymizer.dicom_anonymization_databases import dicomfields_2023 # Ignore warnings from pydicom validation @@ -50,6 +51,34 @@ def orig_anon_dataset(request): return (orig_ds, anon_ds) +@pytest.fixture +def dataset_with_private_fields(): + fields = [ + { + "id": (0x2001, 0x0010), + "type": "LO", + "value": "Tag creator", + }, + { # Replaced with empty value + "id": (0x2001, 0x1001), + "type": "FL", + "value": 0, + }, + { # Replaced with empty value + "id": (0x2001, 0x0011), + "type": "LO", + "value": "Tag creator 2", + }, + ] + + data = pydicom.Dataset() + + for field in fields: # sourcery skip: no-loop-in-tests + data.add_new(field["id"], field["type"], field["value"]) + + return data + + def test_deleted_tags_are_removed(orig_anon_dataset): orig_ds, anon_ds = orig_anon_dataset deleted_tags = dicomfields_2023.X_TAGS @@ -61,9 +90,9 @@ def test_deleted_tags_are_removed(orig_anon_dataset): # TODO: Investigate why Date type are replaced instead of deleted # See issue https://github.com/KitwareMedical/dicom-anonymizer/issues/56 if orig_ds[tt].VR != "DA": # sourcery skip: no-conditionals-in-tests - assert tt not in anon_ds, ( - f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}" - ) + assert ( + tt not in anon_ds + ), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->{anon_ds[tt].value}" changed_tags = ( @@ -84,9 +113,9 @@ def is_elem_replaced(orig, anon) -> bool: for tt in changed_tags: if tt in x and len(x[tt].value) > 0: assert tt in y, f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value}->missing!" - assert is_elem_replaced(x[tt], y[tt]), ( - f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} not replaced" - ) + assert is_elem_replaced( + x[tt], y[tt] + ), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} not replaced" return True return orig.value != anon.value if orig.value not in empty_values else True @@ -97,12 +126,12 @@ def test_changed_tags_are_replaced(orig_anon_dataset): for tt in changed_tags: # sourcery skip: no-loop-in-tests if tt in orig_ds: # sourcery skip: no-conditionals-in-tests - assert tt in anon_ds, ( - f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->missing!" - ) - assert is_elem_replaced(orig_ds[tt], anon_ds[tt]), ( - f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value} not replaced" - ) + assert ( + tt in anon_ds + ), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value}->missing!" + assert is_elem_replaced( + orig_ds[tt], anon_ds[tt] + ), f"({tt[0]:04X},{tt[1]:04x}):{orig_ds[tt].value} not replaced" empty_tags = dicomfields_2023.Z_TAGS + dicomfields_2023.X_Z_TAGS @@ -113,9 +142,9 @@ def is_elem_empty(elem) -> bool: for x in elem.value: for tt in empty_tags: if tt in x and len(x[tt].value) > 0: - assert is_elem_empty(x[tt]), ( - f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} is not empty" - ) + assert is_elem_empty( + x[tt] + ), f"({tt[0]:04X},{tt[1]:04x}):{x[tt].value} is not empty" return True return elem.value in empty_values @@ -128,6 +157,35 @@ def test_empty_tags_are_emptied(orig_anon_dataset): if ( tt in orig_ds and len(orig_ds[tt].value) > 0 ): # sourcery skip: no-conditionals-in-tests - assert is_elem_empty(anon_ds[tt]), ( - f"({tt[0]:04X},{tt[1]:04x}):{anon_ds[tt].value} is not empty" - ) + assert is_elem_empty( + anon_ds[tt] + ), f"({tt[0]:04X},{tt[1]:04x}):{anon_ds[tt].value} is not empty" + + +def test_private_tags_are_removed(dataset_with_private_fields): + anonymize_dataset(dataset_with_private_fields) + + assert (0x2001, 0x0010) not in dataset_with_private_fields + assert (0x2001, 0x1001) not in dataset_with_private_fields + + +def test_private_tags_can_be_kept(dataset_with_private_fields): + anonymize_dataset( + dataset_with_private_fields, + extra_anonymization_rules={(0x2001, 0x0010): keep, (0x2001, 0x1001): keep}, + ) + + assert (0x2001, 0x0010) in dataset_with_private_fields + assert (0x2001, 0x1001) in dataset_with_private_fields + assert (0x2001, 0x0011) not in dataset_with_private_fields + + +def test_private_tags_can_be_kept_with_range(dataset_with_private_fields): + anonymize_dataset( + dataset_with_private_fields, + extra_anonymization_rules={ + (0x2001, 0x1001, 0xFFFF, 0xFF00): keep, + }, + ) + + assert (0x2001, 0x1001) in dataset_with_private_fields