DynamicGenetics · leriomaggio · Mar 2, 2022 · Mar 2, 2022 · Mar 2, 2022 · Mar 2, 2022
diff --git a/.gitignore b/.gitignore
@@ -26,6 +26,12 @@ notebooks/FER/
 notebooks/fer2013
 notebooks/*.tar.gz
 
+# FER+ local dataset
+FERPlus/*
+backend/datasets/FERPlus/*
+notebooks/FERPlus/
+notebooks/fer2013new
+
 
 # Python env (Codespaces)
 pythonenv3.8
@@ -158,3 +164,9 @@ dmypy.json
 
 # PyCharm project settings
 .idea
+/notebooks/models/umap_ferplus_test.joblib
+/notebooks/models/umap_ferplus_train.joblib
+/notebooks/models/umap_ferplus_valid.joblib
+/notebooks/models/umap_test.joblib
+/notebooks/models/umap_train.joblib
+/notebooks/models/umap_valid.joblib
diff --git a/backend/datasets/__init__.py b/backend/datasets/__init__.py
@@ -4,16 +4,26 @@
 
 from .fer import FER
 from .sources import load_fer_dataset_lazy, load_fer_training_lazy
+from .sources import load_fer_validation_lazy, load_fer_evaluation_ds_lazy
+from .sources import load_ferplus_dataset_lazy, load_ferplus_evaluation_ds_lazy
 from .sources import DataSource, Sample
 
 
 # Available Dataset Keys
 FER_DATASET = "FER"
+FERPLUS_DATASET = "FER+"
+FERPLUS_METRICS = "FER+_METRICS"
 FER_TRAINING = "FER_TRAIN"
+FER_VALIDATION = "FER_VALIDATION"
+FER_METRICS = "FER_METRICS"
 
 DATASETS_PROXY = {
     FER_DATASET: load_fer_dataset_lazy(),
     FER_TRAINING: load_fer_training_lazy(),
+    FER_VALIDATION: load_fer_validation_lazy(),
+    FER_METRICS: load_fer_evaluation_ds_lazy(),
+    FERPLUS_DATASET: load_ferplus_dataset_lazy(),
+    FERPLUS_METRICS: load_ferplus_evaluation_ds_lazy(),
 }
 
 
@@ -49,6 +59,11 @@ def get_dataset(key: str) -> DataSource:
     "FER",
     "DataSource",
     "FER_DATASET",
+    "FER_METRICS",
+    "FER_VALIDATION",
+    "FER_TRAINING",
+    "FERPLUS_METRICS",
+    "FERPLUS_DATASET",
     "DATASETS_PROXY",
     "get_dataset",
     "Sample",

diff --git a/backend/datasets/fer.py b/backend/datasets/fer.py
@@ -219,9 +219,8 @@ def class_to_idx(self):
     def idx_to_class(self):
         return {v: k for k, v in self.class_to_idx.items()}
 
-    @staticmethod
-    def classes_map():
-        return {i: c for i, c in enumerate(FER.classes)}
+    def classes_map(self):
+        return {i: c for i, c in enumerate(self.classes)}
 
     def _check_exists(self):
         for data_fname in self.data_files.values():
@@ -269,27 +268,30 @@ def download(self):
                 url, download_root=str(self.raw_folder), filename=filename, md5=md5
             )
 
-        # process and save as torch files
-        def _set_partition(label: str) -> str:
-            if label == "Training":
-                return Partition.train.value
-            if label == "PrivateTest":
-                return Partition.validation.value
-            return Partition.test.value
-
         print("Processing...", end="")
+        self._process_partitions()
+        print("Done!")
+
+    def _process_partitions(self):
         raw_data_filepath = self.raw_folder / self.RAW_DATA_FOLDER / self.RAW_DATA_FILE
         raw_df = pd.read_csv(raw_data_filepath)
-        raw_df["data_partition"] = raw_df.Usage.apply(_set_partition)
-
+        raw_df["data_partition"] = raw_df.Usage.apply(self._set_partition)
         for partition in Partition:
             dataset = raw_df[raw_df["data_partition"] == partition.value]
             images = self._images_as_torch_tensors(dataset)
             labels = self._labels_as_torch_tensors(dataset)
             data_file = self.processed_folder / self.data_files[partition]
             with open(data_file, "wb") as f:
                 torch.save((images, labels), f)
-        print("Done!")
+
+    # process and save as torch files
+    @staticmethod
+    def _set_partition(label: str) -> str:
+        if label == "Training":
+            return Partition.train.value
+        if label == "PrivateTest":
+            return Partition.validation.value
+        return Partition.test.value
 
     def _images_as_torch_tensors(self, dataset: pd.DataFrame) -> torch.Tensor:
         """

diff --git a/backend/datasets/ferplus.py b/backend/datasets/ferplus.py
@@ -0,0 +1,156 @@
+"""
+This module provides access to the FER+ (Facial Emotion Recognition Plus)
+as encapsulated as a `torchvision.datasets.VisionDataset` class.
+
+Notes
+-----
+The FER+ annotations provide a set of new labels for the standard FER dataset.
+In FER+, each image has been labeled by 10 crowd-sourced taggers, which provide
+better quality ground truth for each image/emotion than the original FER labels.
+
+Having 10 taggers for each image enables researchers to estimate an emotion
+probability distribution per face.
+This allows constructing algorithms that produce statistical distributions or
+multi-label outputs instead of the conventional single-label output,
+as described in [1]_
+
+The new label file is named `fer2013new.csv` and contains the same number of rows
+as the original `fer2013.csv` label file with the same order,
+so that you infer which emotion tag belongs to which image.
+
+The format of the CSV file is as follows:
+```
+usage, neutral, happiness, surprise, sadness, anger, disgust, fear, contempt, unknown, NF
+```
+Columns "usage" is the same as the original FER label to differentiate between
+_Training_, _Public test_, and _Private test_ (validation) sets.
+
+The other columns are the **vote count** for each emotion with the addition of
+`unknown` and `NF` (i.e. _Not a Face_).
+
+
+References
+-----------
+.. [1]  Emad Barsoum and Cha Zhang and Cristian Canton Ferrer and Zhengyou Zhang.
+   "Training Deep Networks for Facial Expression Recognition with Crowd-Sourced
+   Label Distribution". ICMI '16: Proceedings of the 18th ACM International
+   Conference on Multimodal Interaction, October 2016, Pages 279–283
+   https://doi.org/10.1145/2993148.2993165
+"""
+from typing import List, Union
+from typing import Any, Optional, Callable
+from pathlib import Path
+
+import pandas as pd
+import numpy as np
+import torch as th
+
+from .fer import FER, Partition
+
+
+class FERPlus(FER):
+
+    NEW_LABELS_DATA_FILE = "fer2013new.csv"
+    RAW_DATA_FOLDER = "fer2013new"
+
+    resources = [
+        (
+            "https://www.dropbox.com/s/659oxqg0osbozmj/fer2013new.tar.gz?dl=1",
+            "338fafaa116322c4d7ecd24a65d014bf",
+        )
+    ]
+
+    # NOTE: These are NOT classes original names (as in FER+) - aligned with FER! - and
+    # they are not even in the original FER+ order: last two classes have been swapped!
+    # However, pretrained model (i.e. VGGFERNet) has been trained on 8 classes
+    # namely, include_nc=False). Therefore, model predictions are generated considering
+    # a class mapping as ordered below.
+    classes = [
+        "neutral",
+        "happy",
+        "surprise",
+        "sad",
+        "angry",
+        "disgust",
+        "fear",
+        "contempt",
+        "not-human-face",
+        "unknown",
+    ]
+
+    def __init__(
+        self,
+        root: str,
+        split: str = "train",
+        download: bool = False,
+        transform: Optional[Callable[[Any], Any]] = None,
+        include_nf_class: bool = False,
+    ):
+        self._include_nf = include_nf_class
+        super(FERPlus, self).__init__(
+            root, split=split, download=download, transform=transform
+        )
+
+    @property
+    def processed_folder(self):
+        return (
+            Path(self.root)
+            / f"{self.__class__.__name__}{'_with_nc' if self._include_nf else ''}"
+            / "processed"
+        )
+
+    @property
+    def raw_folder(self):
+        return (
+            Path(self.root)
+            / f"{self.__class__.__name__}{'_with_nc' if self._include_nf else ''}"
+            / "raw"
+        )
+
+    @staticmethod
+    def majority_count(entries: List[Union[str, int]]) -> Any:
+        votes = entries[2:]
+        all_votes = sum(votes)
+        max_vote = max(votes)
+        if max_vote <= 0.5 * all_votes:
+            return len(votes) - 2  # UKNOWN emotion
+        return np.argmax(votes)
+
+    def _process_partitions(self):
+        # data files path
+        raw_data_filepath = self.raw_folder / self.RAW_DATA_FOLDER / self.RAW_DATA_FILE
+        new_labels_data_filepath = (
+            self.raw_folder / self.RAW_DATA_FOLDER / self.NEW_LABELS_DATA_FILE
+        )
+        # data frames
+        raw_df = pd.read_csv(raw_data_filepath)
+        fer_plus_df = pd.read_csv(new_labels_data_filepath, header=0)
+
+        # count majority vote
+        fer_plus_df["majority"] = fer_plus_df.apply(self.majority_count, axis=1)
+
+        # set partitions
+        raw_df["data_partition"] = raw_df.Usage.apply(self._set_partition)
+        fer_plus_df["data_partition"] = fer_plus_df.Usage.apply(self._set_partition)
+
+        for partition in Partition:
+            dataset = raw_df[raw_df["data_partition"] == partition.value]
+            fer_plus_ds = fer_plus_df[fer_plus_df["data_partition"] == partition.value]
+            if not self._include_nf:
+                valid = fer_plus_ds[fer_plus_ds.majority <= 7]  # all but UNKNWN and NF
+            else:
+                valid = fer_plus_ds[fer_plus_ds.majority != 8]  # all but UNKNWN
+            images = dataset.loc[valid.index]
+            images = self._images_as_torch_tensors(images)
+            labels = self._labels_as_torch_tensors(valid)
+            data_file = self.processed_folder / self.data_files[partition]
+            with open(data_file, "wb") as f:
+                th.save((images, labels), f)
+
+    def _labels_as_torch_tensors(self, dataset: pd.DataFrame):
+        """Extract labels from pd.Series and convert into torch.Tensor"""
+        labels_np = dataset["majority"].values.astype(np.int)
+        if self._include_nf:
+            # re-map the original index for NF from 9 to 8
+            labels_np[labels_np == 9] = 8
+        return th.from_numpy(labels_np)