TIO-IKIM
diff --git a/‎README.md
+2-3 b/‎README.md
+2-3
diff --git a/‎configs/examples/preprocessing/patch_extraction/patch_extraction.yaml
+9-1 b/‎configs/examples/preprocessing/patch_extraction/patch_extraction.yaml
+9-1
diff --git a/‎configs/python/config.py
+11-3 b/‎configs/python/config.py
+11-3
diff --git a/‎docs/readmes/preprocessing.md
+21-4 b/‎docs/readmes/preprocessing.md
+21-4
diff --git a/‎preprocessing/patch_extraction/src/cli.py
+41-11 b/‎preprocessing/patch_extraction/src/cli.py
+41-11
diff --git a/‎preprocessing/patch_extraction/src/data/tissue_detector.pt
5.93 MB b/‎preprocessing/patch_extraction/src/data/tissue_detector.pt
5.93 MB
@@ -70,11 +70,10 @@ This repository contains the code implementation of CellViT, a deep learning-bas
 
 1. Clone the repository:
   `git clone https://github.com/TIO-IKIM/CellViT.git`
-2. Create a conda environment with Python 3.9.7 version and install conda requirements: `conda env create -f environment.yml`. You can change the environment name by editing the `name` tag in the environment.yaml file.
+2. Create a conda environment with Python 3.10.12 version and install conda requirements: `conda env create -f environment.yml`. You can change the environment name by editing the `name` tag in the environment.yaml file.
 This step is necessary, as we need to install `Openslide` with binary files. This is easier with conda. Otherwise, installation from [source](https://openslide.org/api/python/) needs to be performed and packages installed with pi
 3. Activate environment: `conda activate cellvit_env`
-4. Install torch for for system, as described [here](https://pytorch.org/get-started/locally/). Preferred version is 1.13, see [optional_dependencies](./optional_dependencies.txt) for help. You can find all version here: https://pytorch.org/get-started/previous-versions/
-Example for CUDA 11.7: `pip install torch==1.13.0+cu117 torchvision==0.14.0+cu117 torchaudio==0.13.0 --extra-index-url https://download.pytorch.org/whl/cu117`
+4. Install torch (>=2.0) for your system, as described [here](https://pytorch.org/get-started/locally/). Preferred version is 2.0, see [optional_dependencies](./optional_dependencies.txt) for help. You can find all version here: https://pytorch.org/get-started/previous-versions/
 
 5. Install optional dependencies `pip install -r optional_dependencies.txt` to get a speedup using [NVIDIA-Clara](https://www.nvidia.com/de-de/clara/) and [CuCIM](https://github.com/rapidsai/cucim) for preprocessing during inference. Please select your CUDA versions. Help for installing cucim can be found [online](https://github.com/rapidsai/cucim).
 **Note Error: cannot import name CuImage from cucim**
 
@@ -14,9 +14,14 @@ patch_overlap:                # The percentage amount pixels that should overlap
 downsample:                   # Each WSI level is downsampled by a factor of 2, downsample
                               # expresses which kind of downsampling should be used with
                               # respect to the highest possible resolution. [int][Optional, defaults to 0]
+target_mpp:                   # If this parameter is provided, the output level of the WSI
+                              # corresponds to the level that is at the target microns per pixel of the WSI.
+                              # Alternative to target_mag, downsaple and level. Highest priority, overwrites all other setups for magnifcation, downsample, or level.
+                              # [int][Optional, defaults to None]
 target_mag:                   # If this parameter is provided, the output level of the WSI
                               # corresponds to the level that is at the target magnification of the WSI.
-                              # Alternative to downsaple and level. [int][Optional, defaults to None]
+                              # Alternative to target_mpp, downsaple and level. High priority, just target_mpp has a higher priority, overwrites downsample and level if provided.
+                              # [int][Optional, defaults to None]
 level:                        # The tile level for sampling, alternative to downsample. [int][Optional, defaults to None]
 context_scales:               # Define context scales for context patches. Context patches are centered around a central patch.
                               # The context-patch size is equal to the patch-size, but downsampling is different.
@@ -56,8 +61,11 @@ tissue_annotation:            # Can be used to name a polygon annotation to dete
 masked_otsu:                  # Use annotation to mask the thumbnail before otsu-thresholding is used. [bool][Optional, defaults to False]
 otsu_annotation:              # Can be used to name a polygon annotation to determine the area
                               # for masked otsu thresholding. [List][Optional, defaults to None]
+filter_patches:               # Post-extraction patch filtering to sort out artefacts, marker and other non-tissue patches with a DL model. Time consuming.
+                              # [bool] [Optional, defaults to False]
 
 # logging
 log_path:                     # Path where log files should be stored. Otherwise, log files are stored in the output folder. [str][Optional, defaults to None]
 log_level:                    # Set the logging level. [str][Optional, defaults to info]
 hardware_selection:           # Select hardware device (just if available, otherwise always cucim). [str] [Optional, defaults to cucim]
+wsi_properties:               # Dictionary with manual WSI metadata. Required keys are: ... TODO: add keys [dict] [Optional, default selection from files]
@@ -7,10 +7,18 @@
 
 from typing import List
 
-WSI_EXT: List[str] = ["svs"]
-ANNOTATION_EXT: List[str] = ["json", "xml"]
+WSI_EXT: List[str] = [
+    "svs",
+    "tiff",
+    "tif",
+    "bif",
+    "scn",
+    "ndpi",
+    "vms",
+    "vmu",
+]  # mirax not tested yet
+ANNOTATION_EXT: List[str] = ["json"]
 LOGGING_EXT: List[str] = ["critical", "error", "warning", "info", "debug"]
-
 BACKBONES: List[str] = ["ResNet50", "ResNet50Bottleneck", "ResNet18", "ResNet34"]
 
 # Currently: 30 Colors
 
@@ -1,18 +1,21 @@
 # Preprocessing
 
+In our Pre-Processing pipeline, we are able to extract quadratic patches from detected tissue areas, load annotation files (`.json`) and apply color normlizations. We make use of the popular [OpenSlide](https://openslide.org/) library, but extended it with the [RAPIDS cuCIM](https://github.com/rapidsai/cucim) framework for a speedup in patch-extraction.
+
 The CLI of the main script for patch extraction ([main_extraction](preprocessing/main_extraction.py)) is as follows:
+
 ```bash
 python3 main_extraction.py [-h]
-usage: main_extraction.py [-h]
                           [--wsi_paths WSI_PATHS]
                           [--wsi_filelist WSI_FILELIST]
                           [--output_path OUTPUT_PATH]
                           [--wsi_extension {svs}]
                           [--config CONFIG]
                           [--patch_size PATCH_SIZE]
                           [--patch_overlap PATCH_OVERLAP]
-                          [--downsample DOWNSAMPLE]
+                          [--target_mpp TARGET_MPP]
                           [--target_mag TARGET_MAG]
+                          [--downsample DOWNSAMPLE]
                           [--level LEVEL]
                           [--context_scales [CONTEXT_SCALES ...]]
                           [--check_resolution CHECK_RESOLUTION]
@@ -32,9 +35,11 @@ usage: main_extraction.py [-h]
                           [--tissue_annotation TISSUE_ANNOTATION]
                           [--masked_otsu]
                           [--otsu_annotation OTSU_ANNOTATION]
+                          [--filter_patches FILTER_PATCHES]
                           [--log_path LOG_PATH]
                           [--log_level {critical,error,warning,info,debug}]
                           [--hardware_selection {cucim,openslide}]
+                          [--wsi_properties DICT]
 
 optional arguments:
   -h, --help            show this help message and exit
@@ -62,10 +67,16 @@ optional arguments:
                         downsampling should be used with respect to the highest possible resolution. Medium
                         priority, gets overwritten by target_mag if provided, but overwrites level. (default:
                         None)
+  --target_mpp TARGET_MPP
+                        If this parameter is provided, the output level of the WSI corresponds to the level that
+                        is at the target microns per pixel of the WSI. Alternative to target_mag, downsaple and level.
+                        Highest priority,
+                        overwrites target_mag, downsample and level if provided. (default: None)
   --target_mag TARGET_MAG
                         If this parameter is provided, the output level of the WSI corresponds to the level that
-                        is at the target magnification of the WSI. Alternative to downsaple and level. Highest
-                        priority, overwrites downsample and level if provided. (default: None)
+                        is at the target magnification of the WSI. Alternative to target_mpp, downsaple and level.
+                        High priority, just target_mpp has a higher priority,
+                        overwrites downsample and level if provided. (default: None)
   --level LEVEL         The tile level for sampling, alternative to downsample. Lowest priority, gets overwritten
                         by target_mag and downsample if they are provided. (default: None)
   --context_scales [CONTEXT_SCALES ...]
@@ -112,13 +123,19 @@ optional arguments:
   --otsu_annotation OTSU_ANNOTATION
                         Can be used to name a polygon annotation to determine the area for masked otsu
                         thresholding. Seperate multiple labels with ' ' (whitespace) (default: None)
+  --filter_patches FILTER_PATCHES
+                        Post-extraction patch filtering to sort out artefacts, marker and other non-tissue patches with a DL model. Time consuming. Defaults to False.
+                        (default: False)
   --log_path LOG_PATH   Path where log files should be stored. Otherwise, log files are stored in the output
                         folder (default: None)
   --log_level {critical,error,warning,info,debug}
                         Set the logging level. Options are ['critical', 'error', 'warning', 'info', 'debug']
                         (default: None)
   --hardware_selection {cucim,openslide}
                         Select hardware device (just if available, otherwise always cucim). Defaults to cucim.)
+  --wsi_properties WSI_PROPERTIES
+                        Can be used to pass the wsi properties manually
+                        (default: None)
 ```
 
 **Label-Map**:
 
@@ -33,8 +33,9 @@ class PreProcessingYamlConfig(BaseModel):
     # basic setups
     patch_size: Optional[int]
     patch_overlap: Optional[float]
-    downsample: Optional[int]
+    target_mpp: Optional[float]
     target_mag: Optional[float]
+    downsample: Optional[int]
     level: Optional[int]
     context_scales: Optional[List[int]]
     check_resolution: Optional[float]
@@ -62,11 +63,13 @@ class PreProcessingYamlConfig(BaseModel):
     tissue_annotation: Optional[str]
     masked_otsu: Optional[bool]
     otsu_annotation: Optional[str]
+    filter_patches: Optional[bool]
 
     # other
     log_path: Optional[str]
     log_level: Optional[str]
     hardware_selection: Optional[str]
+    wsi_properties: Optional[dict]
 
 
 class PreProcessingConfig(BaseModel):
@@ -84,12 +87,15 @@ class PreProcessingConfig(BaseModel):
         patch_overlap (float, optional): The percentage amount pixels that should overlap between two different patches.
             Please Provide as integer between 0 and 100, indicating overlap in percentage.
             Defaults to 0.
+        target_mpp (float, optional): If this parameter is provided, the output level of the WSI
+            corresponds to the level that is at the target microns per pixel of the WSI.
+            Alternative to target_mag, downsaple and level. Highest priority, overwrites all other setups for magnifcation, downsample, or level.
+        target_mag (float, optional): If this parameter is provided, the output level of the WSI
+            corresponds to the level that is at the target magnification of the WSI.
+            Alternative to target_mpp, downsaple and level. High priority, just target_mpp has a higher priority, overwrites downsample and level if provided. Defaults to None.
         downsample (int, optional): Each WSI level is downsampled by a factor of 2, downsample
             expresses which kind of downsampling should be used with
             respect to the highest possible resolution. Defaults to 0.
-        target_mag (float, optional): If this parameter is provided, the output level of the WSI
-            corresponds to the level that is at the target magnification of the WSI.
-            Alternative to downsaple and level. Defaults to None.
         level (int, optional): The tile level for sampling, alternative to downsample. Defaults to None.
         context_scales ([List[int], optional): Define context scales for context patches. Context patches are centered around a central patch.
             The context-patch size is equal to the patch-size, but downsampling is different.
@@ -125,9 +131,12 @@ class PreProcessingConfig(BaseModel):
         masked_otsu (bool, optional): Use annotation to mask the thumbnail before otsu-thresholding is used. Defaults to False.
         otsu_annotation (bool, optional): Can be used to name a polygon annotation to determine the area
             for masked otsu thresholding. Seperate multiple labels with ' ' (whitespace). Defaults to None.
+        filter_patches (bool, optional): Post-extraction patch filtering to sort out artefacts, marker and other non-tissue patches with a DL model. Time consuming.
+            Defaults to False.
         log_path (str, optional): Path where log files should be stored. Otherwise, log files are stored in the output folder. Defaults to None.
         log_level (str, optional): Set the logging level. Defaults to "info".
         hardware_selection (str, optional): Select hardware device (just if available, otherwise always cucim). Defaults to "cucim".
+        wsi_properties (dict, optional): Dictionary with manual WSI metadata. Required keys are: ... TODO: add keys
 
     Raises:
         ValueError: Patch-size must be positive
@@ -150,6 +159,7 @@ class PreProcessingConfig(BaseModel):
     patch_size: Optional[int] = 256
     patch_overlap: Optional[float] = 0
     downsample: Optional[int] = 1
+    target_mpp: Optional[float]
     target_mag: Optional[float]
     level: Optional[int]
     context_scales: Optional[List[int]]
@@ -178,11 +188,13 @@ class PreProcessingConfig(BaseModel):
     tissue_annotation: Optional[str]
     masked_otsu: Optional[bool] = False
     otsu_annotation: Optional[str]
+    filter_patches: Optional[bool] = False
 
     # other
     log_path: Optional[str]
     log_level: Optional[str] = "info"
     hardware_selection: Optional[str] = "cucim"
+    wsi_properties: Optional[dict]
 
     def __init__(__pydantic_self__, **data: Any) -> None:
         super().__init__(**data)
@@ -340,19 +352,26 @@ def __init__(self) -> None:
             "Please Provide as integer between 0 and 100, indicating overlap in percentage.",
         )
         parser.add_argument(
-            "--downsample",
-            type=int,
-            help="Each WSI level is downsampled by a factor of 2, downsample "
-            "expresses which kind of downsampling should be used with "
-            "respect to the highest possible resolution. Medium priority, gets overwritten by target_mag if provided, "
-            "but overwrites level.",
+            "--target_mpp",
+            type=float,
+            help="If this parameter is provided, the output level of the WSI "
+            "corresponds to the level that is at the target microns per pixel of the WSI. "
+            "Alternative to target_mag, downsaple and level. Highest priority, overwrites all other setups for magnifcation, downsample, or level.",
         )
         parser.add_argument(
             "--target_mag",
             type=float,
             help="If this parameter is provided, the output level of the WSI "
             "corresponds to the level that is at the target magnification of the WSI. "
-            "Alternative to downsaple and level. Highest priority, overwrites downsample and level if provided.",
+            "Alternative to target_mpp, downsaple and level. High priority, just target_mpp has a higher priority, overwrites downsample and level if provided.",
+        )
+        parser.add_argument(
+            "--downsample",
+            type=int,
+            help="Each WSI level is downsampled by a factor of 2, downsample "
+            "expresses which kind of downsampling should be used with "
+            "respect to the highest possible resolution. Medium priority, gets overwritten by target_mag and target_mpp if provided, "
+            "but overwrites level.",
         )
         parser.add_argument(
             "--level",
@@ -485,6 +504,12 @@ def __init__(self) -> None:
             help="Can be used to name a polygon annotation to determine the area "
             "for masked otsu thresholding. Seperate multiple labels with ' ' (whitespace)",
         )
+        parser.add_argument(
+            "--filter_patches",
+            action="store_true",
+            default=None,
+            help="Post-extraction patch filtering to sort out artefacts, marker and other non-tissue patches with a DL model. Time consuming. Defaults to False.",
+        )
 
         # other
         parser.add_argument(
@@ -504,6 +529,11 @@ def __init__(self) -> None:
             choices=["cucim", "openslide"],
             help="Select hardware device (just if available, otherwise always cucim). Defaults to cucim.",
         )
+        parser.add_argument(
+            "--wsi_properties",
+            type=dict,
+            help="Can be used to pass the wsi properties manually",
+        )
 
         self.parser = parser