Merge pull request #30 from mhawryluk/@gregori0o/refactor

Refactor
mhawryluk · Nov 29, 2022 · ec0cd7f · ec0cd7f
2 parents 43c07be + 86b467d
commit ec0cd7f
Show file tree

Hide file tree

Showing 88 changed files with 3,072 additions and 1,998 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,29 @@
+repos:
+   -   repo: https://github.com/pycqa/isort
+       rev: 5.10.1
+       hooks:
+         - id: isort
+           name: isort (python)
+           args:
+             - "--profile=black"
+
+   -   repo: https://github.com/psf/black
+       rev: 22.10.0
+       hooks:
+         - id: black
+           args:
+             - --line-length=88
+             - --include='\.pyi?$'
+
+   -   repo: https://github.com/pycqa/flake8
+       rev: 5.0.4
+       hooks:
+         - id: flake8
+           args:
+             - "--max-line-length=88"
+             - "--max-complexity=18"
+             - "--select=B,C,E,F,W,T4,B9,c90"
+             - "--ignore=E203,E266,E501,W503,F403,F401,E402"
+
+default_language_version:
+    python: python3.10
diff --git a/requirements.txt b/requirements.txt
@@ -1,31 +1,32 @@
 cycler==0.11.0
 dnspython==2.2.1
-fonttools==4.32.0
-kiwisolver==1.4.2
-matplotlib==3.5.1
-networkx==2.8.7
-numpy==1.22.3
+fonttools==4.38.0
+kiwisolver==1.4.4
+matplotlib==3.6.2
+networkx==2.8.8
+numpy==1.23.4
 packaging==21.3
-pandas==1.4.2
-Pillow==9.1.0
-plotly==5.10.0
-psutil==5.9.0
-pymongo==4.1.1
-pyparsing==3.0.8
-PyQt5==5.15.6
+pandas==1.5.1
+Pillow==9.3.0
+plotly==5.11.0
+psutil==5.9.4
+pymongo==4.3.2
+pyparsing==3.0.9
+PyQt5==5.15.7
 PyQt5-Qt5==5.15.2
 PyQt5-sip==12.11.0
-PyQt5-stubs==5.15.2.0
+PyQt5-stubs==5.15.6.0
 PyQtWebEngine==5.15.6
 PyQtWebEngine-Qt5==5.15.2
 python-dateutil==2.8.2
-pytz==2022.1
+pytz==2022.6
 six==1.16.0
-joblib~=1.1.0
+joblib==1.2.0
 pygraphviz~=1.10
 QGraphViz~=0.0.55
-pip~=22.0.4
-wheel~=0.37.1
-setuptools~=60.2.0
+pip==22.3.1
+wheel==0.38.4
+setuptools==65.5.1
 graphviz~=0.20.1
 scipy==1.9.3
+pre-commit
diff --git a/src/algorithms/__init__.py b/src/algorithms/__init__.py
@@ -1 +1,2 @@
-from .utils import get_samples, check_numeric, get_threads_count
+from .algorithm import Algorithm
+from .utils import check_numeric, get_samples, get_threads_count
diff --git a/src/algorithms/algorithm.py b/src/algorithms/algorithm.py
@@ -0,0 +1,23 @@
+from abc import abstractmethod
+from typing import List
+
+
+class Algorithm:
+    """
+    Abstract class of algorithm
+    """
+
+    @abstractmethod
+    def run(self, with_steps: bool):
+        """
+        Run algorithm and return result for class AlgorithmResultsWidget
+        If with_steps is true, saves steps of algorithm creation
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_steps(self) -> List:
+        """
+        Return list of steps for visualization by AlgorithmStepsVisualization
+        """
+        raise NotImplementedError
diff --git a/src/algorithms/associations/a_priori.py b/src/algorithms/associations/a_priori.py
@@ -1,19 +1,32 @@
-from itertools import combinations, chain
-from typing import List, Tuple, Optional
 from enum import Enum
-from utils import format_set
+from itertools import chain, combinations
+from typing import List, Optional, Tuple
 
 import pandas as pd
 
+from algorithms import Algorithm
+from utils import format_set
+
 
-class APriori:
-    def __init__(self, data: pd.DataFrame, index_column: str, min_support: float, min_confidence: float):
+class APriori(Algorithm):
+    def __init__(
+        self,
+        data: pd.DataFrame,
+        index_column: str,
+        min_support: float,
+        min_confidence: float,
+    ):
         self.min_support = min_support
         self.min_confidence = min_confidence
         self.data = data.set_index(index_column)
         self.columns = self.data.columns
         self.transaction_sets = list(
-            map(set, self.data.apply(lambda x: x > 0).apply(lambda x: list(self.columns[x.values]), axis=1))
+            map(
+                set,
+                self.data.apply(lambda x: x > 0).apply(
+                    lambda x: list(self.columns[x.values]), axis=1
+                ),
+            )
         )
         self.all_frequent_sets = {}
         self.k_frequent_sets_df = None
@@ -22,11 +35,15 @@ def __init__(self, data: pd.DataFrame, index_column: str, min_support: float, mi
     def run(self, with_steps) -> Tuple[pd.DataFrame, pd.DataFrame, List[set]]:
         frequent_sets = None
 
-        for k in range(1, len(self.columns)):  # k as in k-item_sets - sets that contain k elements
+        for k in range(
+            1, len(self.columns)
+        ):  # k as in k-item_sets - sets that contain k elements
             generated_item_sets = self._generate_item_sets(with_steps, frequent_sets)
             new_frequent_sets = {}
-            for item_set, item_set_support in zip(generated_item_sets,
-                                                  map(lambda item_set: self.support(item_set), generated_item_sets)):
+            for item_set, item_set_support in zip(
+                generated_item_sets,
+                map(lambda item_set: self.support(item_set), generated_item_sets),
+            ):
                 if item_set_support >= self.min_support:
                     new_frequent_sets[item_set] = item_set_support
 
@@ -46,7 +63,11 @@ def run(self, with_steps) -> Tuple[pd.DataFrame, pd.DataFrame, List[set]]:
                     {
                         "part": APrioriPartLabel.FILTER_BY_SUPPORT,
                         "frequent_sets": list(new_frequent_sets.keys()),
-                        "infrequent_sets": [set_ for set_ in generated_item_sets if set_ not in new_frequent_sets],
+                        "infrequent_sets": [
+                            set_
+                            for set_ in generated_item_sets
+                            if set_ not in new_frequent_sets
+                        ],
                         "data_frame": self.k_frequent_sets_df,
                     }
                 )
@@ -77,32 +98,41 @@ def run(self, with_steps) -> Tuple[pd.DataFrame, pd.DataFrame, List[set]]:
                 }
             )
 
-        return self._get_frequent_set_pd(self.all_frequent_sets), rules, self.transaction_sets
+        return (
+            self._get_frequent_set_pd(self.all_frequent_sets),
+            rules,
+            self.transaction_sets,
+        )
 
     def get_steps(self) -> List[dict]:
         return self.saved_steps
 
     def _get_frequent_set_pd(self, frequent_sets: dict):
-        return pd.DataFrame.from_dict({
-            format_set(frequent_set): round(self.support(frequent_set), 3)
-            for frequent_set, support
-            in frequent_sets.items()
-        },
+        return pd.DataFrame.from_dict(
+            {
+                format_set(frequent_set): round(self.support(frequent_set), 3)
+                for frequent_set, support in frequent_sets.items()
+            },
             orient="index",
-            columns=["support"]
+            columns=["support"],
         ).sort_values(by="support", ascending=False)
 
-    def _generate_item_sets(self, with_steps: bool, frequent_sets: Optional[List[tuple]]) -> List[tuple]:
+    def _generate_item_sets(
+        self, with_steps: bool, frequent_sets: Optional[List[tuple]]
+    ) -> List[tuple]:
         """
-            Generates (k+1)-item_sets from k-item_sets
-            Returns all found sets, not only strong ones
+        Generates (k+1)-item_sets from k-item_sets
+        Returns all found sets, not only strong ones
         """
         if frequent_sets is None:
             return [(item,) for item in self.columns.values]
 
         new_item_sets = []
         for frequent_set_1, frequent_set_2 in combinations(frequent_sets, 2):
-            if not (frequent_set_1[:-1] == frequent_set_2[:-1] and frequent_set_1[-1] < frequent_set_2[-1]):
+            if not (
+                frequent_set_1[:-1] == frequent_set_2[:-1]
+                and frequent_set_1[-1] < frequent_set_2[-1]
+            ):
                 continue
 
             new_item_set = self.join(frequent_set_1, frequent_set_2)
@@ -118,7 +148,9 @@ def _generate_item_sets(self, with_steps: bool, frequent_sets: Optional[List[tup
                     }
                 )
 
-            if not self._has_infrequent_subsets(with_steps, new_item_set, frequent_sets):
+            if not self._has_infrequent_subsets(
+                with_steps, new_item_set, frequent_sets
+            ):
                 new_item_sets.append(new_item_set)
 
         return new_item_sets
@@ -127,7 +159,9 @@ def _generate_item_sets(self, with_steps: bool, frequent_sets: Optional[List[tup
     def join(frequent_set_1: tuple, frequent_set_2: tuple) -> tuple:
         return frequent_set_1 + (frequent_set_2[-1],)
 
-    def _has_infrequent_subsets(self, with_steps, new_frequent_set, prev_frequent_sets) -> bool:
+    def _has_infrequent_subsets(
+        self, with_steps, new_frequent_set, prev_frequent_sets
+    ) -> bool:
         for subset in combinations(new_frequent_set, len(prev_frequent_sets)):
             if subset not in prev_frequent_sets:
                 if with_steps:
@@ -146,12 +180,16 @@ def support(self, item_set: tuple) -> float:
         return count / len(self.transaction_sets)
 
     def confidence(self, item_set_a: tuple, item_set_b: tuple) -> float:  # a => b
-        return self.all_frequent_sets[tuple(sorted(set(item_set_a) | set(item_set_b)))] \
-               / self.all_frequent_sets[item_set_a]
+        return (
+            self.all_frequent_sets[tuple(sorted(set(item_set_a) | set(item_set_b)))]
+            / self.all_frequent_sets[item_set_a]
+        )
 
     @staticmethod
     def get_all_subsets(item_set: tuple):
-        return chain.from_iterable(combinations(item_set, i) for i in range(len(item_set) + 1))
+        return chain.from_iterable(
+            combinations(item_set, i) for i in range(len(item_set) + 1)
+        )
 
     def _get_association_rules(self, with_steps) -> pd.DataFrame:
         rules = {}
@@ -160,8 +198,12 @@ def _get_association_rules(self, with_steps) -> pd.DataFrame:
                 subset_b = tuple(set(frequent_set) - set(subset_a))
                 if not subset_a or not subset_b:
                     continue
-                if (confidence := self.confidence(subset_a, subset_b)) >= self.min_confidence:
-                    rules[f"{format_set(subset_a)} => {format_set(subset_b)}"] = round(confidence, 3)
+                if (
+                    confidence := self.confidence(subset_a, subset_b)
+                ) >= self.min_confidence:
+                    rules[f"{format_set(subset_a)} => {format_set(subset_b)}"] = round(
+                        confidence, 3
+                    )
 
                 if with_steps:
                     self.saved_steps.append(