Skip to content

Commit

Permalink
Merge pull request #30 from mhawryluk/@gregori0o/refactor
Browse files Browse the repository at this point in the history
Refactor
  • Loading branch information
gregori0o authored Nov 29, 2022
2 parents 43c07be + 86b467d commit ec0cd7f
Show file tree
Hide file tree
Showing 88 changed files with 3,072 additions and 1,998 deletions.
29 changes: 29 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
repos:
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort
name: isort (python)
args:
- "--profile=black"

- repo: https://github.com/psf/black
rev: 22.10.0
hooks:
- id: black
args:
- --line-length=88
- --include='\.pyi?$'

- repo: https://github.com/pycqa/flake8
rev: 5.0.4
hooks:
- id: flake8
args:
- "--max-line-length=88"
- "--max-complexity=18"
- "--select=B,C,E,F,W,T4,B9,c90"
- "--ignore=E203,E266,E501,W503,F403,F401,E402"

default_language_version:
python: python3.10
37 changes: 19 additions & 18 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,31 +1,32 @@
cycler==0.11.0
dnspython==2.2.1
fonttools==4.32.0
kiwisolver==1.4.2
matplotlib==3.5.1
networkx==2.8.7
numpy==1.22.3
fonttools==4.38.0
kiwisolver==1.4.4
matplotlib==3.6.2
networkx==2.8.8
numpy==1.23.4
packaging==21.3
pandas==1.4.2
Pillow==9.1.0
plotly==5.10.0
psutil==5.9.0
pymongo==4.1.1
pyparsing==3.0.8
PyQt5==5.15.6
pandas==1.5.1
Pillow==9.3.0
plotly==5.11.0
psutil==5.9.4
pymongo==4.3.2
pyparsing==3.0.9
PyQt5==5.15.7
PyQt5-Qt5==5.15.2
PyQt5-sip==12.11.0
PyQt5-stubs==5.15.2.0
PyQt5-stubs==5.15.6.0
PyQtWebEngine==5.15.6
PyQtWebEngine-Qt5==5.15.2
python-dateutil==2.8.2
pytz==2022.1
pytz==2022.6
six==1.16.0
joblib~=1.1.0
joblib==1.2.0
pygraphviz~=1.10
QGraphViz~=0.0.55
pip~=22.0.4
wheel~=0.37.1
setuptools~=60.2.0
pip==22.3.1
wheel==0.38.4
setuptools==65.5.1
graphviz~=0.20.1
scipy==1.9.3
pre-commit
3 changes: 2 additions & 1 deletion src/algorithms/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .utils import get_samples, check_numeric, get_threads_count
from .algorithm import Algorithm
from .utils import check_numeric, get_samples, get_threads_count
23 changes: 23 additions & 0 deletions src/algorithms/algorithm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from abc import abstractmethod
from typing import List


class Algorithm:
"""
Abstract class of algorithm
"""

@abstractmethod
def run(self, with_steps: bool):
"""
Run algorithm and return result for class AlgorithmResultsWidget
If with_steps is true, saves steps of algorithm creation
"""
raise NotImplementedError

@abstractmethod
def get_steps(self) -> List:
"""
Return list of steps for visualization by AlgorithmStepsVisualization
"""
raise NotImplementedError
98 changes: 70 additions & 28 deletions src/algorithms/associations/a_priori.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,32 @@
from itertools import combinations, chain
from typing import List, Tuple, Optional
from enum import Enum
from utils import format_set
from itertools import chain, combinations
from typing import List, Optional, Tuple

import pandas as pd

from algorithms import Algorithm
from utils import format_set


class APriori:
def __init__(self, data: pd.DataFrame, index_column: str, min_support: float, min_confidence: float):
class APriori(Algorithm):
def __init__(
self,
data: pd.DataFrame,
index_column: str,
min_support: float,
min_confidence: float,
):
self.min_support = min_support
self.min_confidence = min_confidence
self.data = data.set_index(index_column)
self.columns = self.data.columns
self.transaction_sets = list(
map(set, self.data.apply(lambda x: x > 0).apply(lambda x: list(self.columns[x.values]), axis=1))
map(
set,
self.data.apply(lambda x: x > 0).apply(
lambda x: list(self.columns[x.values]), axis=1
),
)
)
self.all_frequent_sets = {}
self.k_frequent_sets_df = None
Expand All @@ -22,11 +35,15 @@ def __init__(self, data: pd.DataFrame, index_column: str, min_support: float, mi
def run(self, with_steps) -> Tuple[pd.DataFrame, pd.DataFrame, List[set]]:
frequent_sets = None

for k in range(1, len(self.columns)): # k as in k-item_sets - sets that contain k elements
for k in range(
1, len(self.columns)
): # k as in k-item_sets - sets that contain k elements
generated_item_sets = self._generate_item_sets(with_steps, frequent_sets)
new_frequent_sets = {}
for item_set, item_set_support in zip(generated_item_sets,
map(lambda item_set: self.support(item_set), generated_item_sets)):
for item_set, item_set_support in zip(
generated_item_sets,
map(lambda item_set: self.support(item_set), generated_item_sets),
):
if item_set_support >= self.min_support:
new_frequent_sets[item_set] = item_set_support

Expand All @@ -46,7 +63,11 @@ def run(self, with_steps) -> Tuple[pd.DataFrame, pd.DataFrame, List[set]]:
{
"part": APrioriPartLabel.FILTER_BY_SUPPORT,
"frequent_sets": list(new_frequent_sets.keys()),
"infrequent_sets": [set_ for set_ in generated_item_sets if set_ not in new_frequent_sets],
"infrequent_sets": [
set_
for set_ in generated_item_sets
if set_ not in new_frequent_sets
],
"data_frame": self.k_frequent_sets_df,
}
)
Expand Down Expand Up @@ -77,32 +98,41 @@ def run(self, with_steps) -> Tuple[pd.DataFrame, pd.DataFrame, List[set]]:
}
)

return self._get_frequent_set_pd(self.all_frequent_sets), rules, self.transaction_sets
return (
self._get_frequent_set_pd(self.all_frequent_sets),
rules,
self.transaction_sets,
)

def get_steps(self) -> List[dict]:
return self.saved_steps

def _get_frequent_set_pd(self, frequent_sets: dict):
return pd.DataFrame.from_dict({
format_set(frequent_set): round(self.support(frequent_set), 3)
for frequent_set, support
in frequent_sets.items()
},
return pd.DataFrame.from_dict(
{
format_set(frequent_set): round(self.support(frequent_set), 3)
for frequent_set, support in frequent_sets.items()
},
orient="index",
columns=["support"]
columns=["support"],
).sort_values(by="support", ascending=False)

def _generate_item_sets(self, with_steps: bool, frequent_sets: Optional[List[tuple]]) -> List[tuple]:
def _generate_item_sets(
self, with_steps: bool, frequent_sets: Optional[List[tuple]]
) -> List[tuple]:
"""
Generates (k+1)-item_sets from k-item_sets
Returns all found sets, not only strong ones
Generates (k+1)-item_sets from k-item_sets
Returns all found sets, not only strong ones
"""
if frequent_sets is None:
return [(item,) for item in self.columns.values]

new_item_sets = []
for frequent_set_1, frequent_set_2 in combinations(frequent_sets, 2):
if not (frequent_set_1[:-1] == frequent_set_2[:-1] and frequent_set_1[-1] < frequent_set_2[-1]):
if not (
frequent_set_1[:-1] == frequent_set_2[:-1]
and frequent_set_1[-1] < frequent_set_2[-1]
):
continue

new_item_set = self.join(frequent_set_1, frequent_set_2)
Expand All @@ -118,7 +148,9 @@ def _generate_item_sets(self, with_steps: bool, frequent_sets: Optional[List[tup
}
)

if not self._has_infrequent_subsets(with_steps, new_item_set, frequent_sets):
if not self._has_infrequent_subsets(
with_steps, new_item_set, frequent_sets
):
new_item_sets.append(new_item_set)

return new_item_sets
Expand All @@ -127,7 +159,9 @@ def _generate_item_sets(self, with_steps: bool, frequent_sets: Optional[List[tup
def join(frequent_set_1: tuple, frequent_set_2: tuple) -> tuple:
return frequent_set_1 + (frequent_set_2[-1],)

def _has_infrequent_subsets(self, with_steps, new_frequent_set, prev_frequent_sets) -> bool:
def _has_infrequent_subsets(
self, with_steps, new_frequent_set, prev_frequent_sets
) -> bool:
for subset in combinations(new_frequent_set, len(prev_frequent_sets)):
if subset not in prev_frequent_sets:
if with_steps:
Expand All @@ -146,12 +180,16 @@ def support(self, item_set: tuple) -> float:
return count / len(self.transaction_sets)

def confidence(self, item_set_a: tuple, item_set_b: tuple) -> float: # a => b
return self.all_frequent_sets[tuple(sorted(set(item_set_a) | set(item_set_b)))] \
/ self.all_frequent_sets[item_set_a]
return (
self.all_frequent_sets[tuple(sorted(set(item_set_a) | set(item_set_b)))]
/ self.all_frequent_sets[item_set_a]
)

@staticmethod
def get_all_subsets(item_set: tuple):
return chain.from_iterable(combinations(item_set, i) for i in range(len(item_set) + 1))
return chain.from_iterable(
combinations(item_set, i) for i in range(len(item_set) + 1)
)

def _get_association_rules(self, with_steps) -> pd.DataFrame:
rules = {}
Expand All @@ -160,8 +198,12 @@ def _get_association_rules(self, with_steps) -> pd.DataFrame:
subset_b = tuple(set(frequent_set) - set(subset_a))
if not subset_a or not subset_b:
continue
if (confidence := self.confidence(subset_a, subset_b)) >= self.min_confidence:
rules[f"{format_set(subset_a)} => {format_set(subset_b)}"] = round(confidence, 3)
if (
confidence := self.confidence(subset_a, subset_b)
) >= self.min_confidence:
rules[f"{format_set(subset_a)} => {format_set(subset_b)}"] = round(
confidence, 3
)

if with_steps:
self.saved_steps.append(
Expand Down
Loading

0 comments on commit ec0cd7f

Please sign in to comment.