Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor #30

Merged
merged 32 commits into from
Nov 29, 2022
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
7caf527
feat: Create abstract classes and adjust to extra trees and kmeans im…
gregori0o Oct 23, 2022
979da8b
feat: Creat config file for Algorithm References.
gregori0o Oct 25, 2022
4ecb517
Merge branch 'develop' into @gregori0o/abstract-class-for-new-algorithms
gregori0o Oct 25, 2022
ef3092f
feat: Implement usage of algorithms_config.py file.
gregori0o Oct 25, 2022
98e7fa8
fix: Adjust algorithms to new classes
gregori0o Oct 26, 2022
4c16710
refactor: Move steps visualization to widgets.steps_widgets
gregori0o Oct 26, 2022
bede648
refactor: Separate canvas from steps widgets and move to visualizatio…
gregori0o Oct 26, 2022
5ddc9d8
fix: Review suggestions.
gregori0o Oct 26, 2022
6011f08
fix: Review suggestion
gregori0o Nov 7, 2022
b60aab6
refactor: Add pre-commit
gregori0o Nov 7, 2022
292a0c3
fix: Problems with imports
gregori0o Nov 8, 2022
f19cff3
refactor: Small changes
gregori0o Nov 8, 2022
08d5b9b
refactor: Remove check_numeric and use delect_dtypes (as in GMM)
gregori0o Nov 8, 2022
a6cf914
refactor: Merge clustering canvas
gregori0o Nov 8, 2022
c43f9e0
refactor: Create component for step visualization
gregori0o Nov 9, 2022
f78fdbc
refactor: Use ParametersGroupBox
gregori0o Nov 9, 2022
daa5be8
refactor: Add signals to clustering template
gregori0o Nov 10, 2022
a748883
refactor: Separate ClustersTable class
gregori0o Nov 10, 2022
0af9259
fix: Not change section if linalg error in gmm
gregori0o Nov 10, 2022
611b984
refactor: Use widget to samples data and column choice
gregori0o Nov 10, 2022
2e9fa50
refactor: Remove fig and axes argument from canvas.
gregori0o Nov 11, 2022
712264c
feat: Add scatter plot in preprocessing section.
gregori0o Nov 11, 2022
3405ecf
fix: Fix problems with pre-commit.
gregori0o Nov 11, 2022
d0372a3
upgrade: Upgrade requirements
gregori0o Nov 11, 2022
24f1f54
refactor: All algorithms in config was done.
gregori0o Nov 13, 2022
3ef6e7a
Merge branch 'develop' into @gregori0o/refactor
gregori0o Nov 13, 2022
38e1493
Merge branch 'develop' into @gregori0o/refactor
gregori0o Nov 21, 2022
caa8020
fix: Apply review suggestion.
gregori0o Nov 22, 2022
e311b4a
fix: Apply review suggestion.
gregori0o Nov 22, 2022
443b7a5
fix: Small changes from review.
gregori0o Nov 29, 2022
81d57b5
style: Apply pre-commit
gregori0o Nov 29, 2022
86b467d
Merge branch 'develop' into @gregori0o/refactor
gregori0o Nov 29, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
repos:
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort
name: isort (python)
args:
- "--profile=black"

- repo: https://github.com/psf/black
rev: 22.10.0
hooks:
- id: black
args:
- --line-length=88
- --include='\.pyi?$'

- repo: https://github.com/pycqa/flake8
rev: 5.0.4
hooks:
- id: flake8
args:
- "--max-line-length=88"
- "--max-complexity=18"
- "--select=B,C,E,F,W,T4,B9,c90"
- "--ignore=E203,E266,E501,W503,F403,F401,E402"

default_language_version:
python: python3.10
37 changes: 19 additions & 18 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,31 +1,32 @@
cycler==0.11.0
dnspython==2.2.1
fonttools==4.32.0
kiwisolver==1.4.2
matplotlib==3.5.1
networkx==2.8.7
numpy==1.22.3
fonttools==4.38.0
kiwisolver==1.4.4
matplotlib==3.6.2
networkx==2.8.8
numpy==1.23.4
packaging==21.3
pandas==1.4.2
Pillow==9.1.0
plotly==5.10.0
psutil==5.9.0
pymongo==4.1.1
pyparsing==3.0.8
PyQt5==5.15.6
pandas==1.5.1
Pillow==9.3.0
plotly==5.11.0
psutil==5.9.4
pymongo==4.3.2
pyparsing==3.0.9
PyQt5==5.15.7
PyQt5-Qt5==5.15.2
PyQt5-sip==12.11.0
PyQt5-stubs==5.15.2.0
PyQt5-stubs==5.15.6.0
PyQtWebEngine==5.15.6
PyQtWebEngine-Qt5==5.15.2
python-dateutil==2.8.2
pytz==2022.1
pytz==2022.6
six==1.16.0
joblib~=1.1.0
joblib==1.2.0
pygraphviz~=1.10
QGraphViz~=0.0.55
pip~=22.0.4
wheel~=0.37.1
setuptools~=60.2.0
pip==22.3.1
wheel==0.38.4
setuptools==65.5.1
graphviz~=0.20.1
scipy==1.9.3
pre-commit
3 changes: 2 additions & 1 deletion src/algorithms/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .utils import get_samples, check_numeric, get_threads_count
from .algorithm import Algorithm
from .utils import check_numeric, get_samples, get_threads_count
23 changes: 23 additions & 0 deletions src/algorithms/algorithm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from abc import abstractmethod
from typing import List


class Algorithm:
"""
Abstract class of algorithm
"""

@abstractmethod
def run(self, with_steps: bool):
"""
Run algorithm and return result for class AlgorithmResultsWidget
If with_steps is true, saves steps of algorithm creation
"""
raise NotImplementedError

@abstractmethod
def get_steps(self) -> List:
"""
Return list of steps for visualization by AlgorithmStepsVisualization
"""
raise NotImplementedError
98 changes: 70 additions & 28 deletions src/algorithms/associations/a_priori.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,32 @@
from itertools import combinations, chain
from typing import List, Tuple, Optional
from enum import Enum
from utils import format_set
from itertools import chain, combinations
from typing import List, Optional, Tuple

import pandas as pd

from algorithms import Algorithm
from utils import format_set


class APriori:
def __init__(self, data: pd.DataFrame, index_column: str, min_support: float, min_confidence: float):
class APriori(Algorithm):
def __init__(
self,
data: pd.DataFrame,
index_column: str,
min_support: float,
min_confidence: float,
):
self.min_support = min_support
self.min_confidence = min_confidence
self.data = data.set_index(index_column)
self.columns = self.data.columns
self.transaction_sets = list(
map(set, self.data.apply(lambda x: x > 0).apply(lambda x: list(self.columns[x.values]), axis=1))
map(
set,
self.data.apply(lambda x: x > 0).apply(
lambda x: list(self.columns[x.values]), axis=1
),
)
)
self.all_frequent_sets = {}
self.k_frequent_sets_df = None
Expand All @@ -22,11 +35,15 @@ def __init__(self, data: pd.DataFrame, index_column: str, min_support: float, mi
def run(self, with_steps) -> Tuple[pd.DataFrame, pd.DataFrame, List[set]]:
frequent_sets = None

for k in range(1, len(self.columns)): # k as in k-item_sets - sets that contain k elements
for k in range(
1, len(self.columns)
): # k as in k-item_sets - sets that contain k elements
generated_item_sets = self._generate_item_sets(with_steps, frequent_sets)
new_frequent_sets = {}
for item_set, item_set_support in zip(generated_item_sets,
map(lambda item_set: self.support(item_set), generated_item_sets)):
for item_set, item_set_support in zip(
generated_item_sets,
map(lambda item_set: self.support(item_set), generated_item_sets),
):
if item_set_support >= self.min_support:
new_frequent_sets[item_set] = item_set_support

Expand All @@ -46,7 +63,11 @@ def run(self, with_steps) -> Tuple[pd.DataFrame, pd.DataFrame, List[set]]:
{
"part": APrioriPartLabel.FILTER_BY_SUPPORT,
"frequent_sets": list(new_frequent_sets.keys()),
"infrequent_sets": [set_ for set_ in generated_item_sets if set_ not in new_frequent_sets],
"infrequent_sets": [
set_
for set_ in generated_item_sets
if set_ not in new_frequent_sets
],
"data_frame": self.k_frequent_sets_df,
}
)
Expand Down Expand Up @@ -77,32 +98,41 @@ def run(self, with_steps) -> Tuple[pd.DataFrame, pd.DataFrame, List[set]]:
}
)

return self._get_frequent_set_pd(self.all_frequent_sets), rules, self.transaction_sets
return (
self._get_frequent_set_pd(self.all_frequent_sets),
rules,
self.transaction_sets,
)

def get_steps(self) -> List[dict]:
return self.saved_steps

def _get_frequent_set_pd(self, frequent_sets: dict):
return pd.DataFrame.from_dict({
format_set(frequent_set): round(self.support(frequent_set), 3)
for frequent_set, support
in frequent_sets.items()
},
return pd.DataFrame.from_dict(
{
format_set(frequent_set): round(self.support(frequent_set), 3)
for frequent_set, support in frequent_sets.items()
},
orient="index",
columns=["support"]
columns=["support"],
).sort_values(by="support", ascending=False)

def _generate_item_sets(self, with_steps: bool, frequent_sets: Optional[List[tuple]]) -> List[tuple]:
def _generate_item_sets(
self, with_steps: bool, frequent_sets: Optional[List[tuple]]
) -> List[tuple]:
"""
Generates (k+1)-item_sets from k-item_sets
Returns all found sets, not only strong ones
Generates (k+1)-item_sets from k-item_sets
Returns all found sets, not only strong ones
"""
if frequent_sets is None:
return [(item,) for item in self.columns.values]

new_item_sets = []
for frequent_set_1, frequent_set_2 in combinations(frequent_sets, 2):
if not (frequent_set_1[:-1] == frequent_set_2[:-1] and frequent_set_1[-1] < frequent_set_2[-1]):
if not (
frequent_set_1[:-1] == frequent_set_2[:-1]
and frequent_set_1[-1] < frequent_set_2[-1]
):
continue

new_item_set = self.join(frequent_set_1, frequent_set_2)
Expand All @@ -118,7 +148,9 @@ def _generate_item_sets(self, with_steps: bool, frequent_sets: Optional[List[tup
}
)

if not self._has_infrequent_subsets(with_steps, new_item_set, frequent_sets):
if not self._has_infrequent_subsets(
with_steps, new_item_set, frequent_sets
):
new_item_sets.append(new_item_set)

return new_item_sets
Expand All @@ -127,7 +159,9 @@ def _generate_item_sets(self, with_steps: bool, frequent_sets: Optional[List[tup
def join(frequent_set_1: tuple, frequent_set_2: tuple) -> tuple:
return frequent_set_1 + (frequent_set_2[-1],)

def _has_infrequent_subsets(self, with_steps, new_frequent_set, prev_frequent_sets) -> bool:
def _has_infrequent_subsets(
self, with_steps, new_frequent_set, prev_frequent_sets
) -> bool:
for subset in combinations(new_frequent_set, len(prev_frequent_sets)):
if subset not in prev_frequent_sets:
if with_steps:
Expand All @@ -146,12 +180,16 @@ def support(self, item_set: tuple) -> float:
return count / len(self.transaction_sets)

def confidence(self, item_set_a: tuple, item_set_b: tuple) -> float: # a => b
return self.all_frequent_sets[tuple(sorted(set(item_set_a) | set(item_set_b)))] \
/ self.all_frequent_sets[item_set_a]
return (
self.all_frequent_sets[tuple(sorted(set(item_set_a) | set(item_set_b)))]
/ self.all_frequent_sets[item_set_a]
)

@staticmethod
def get_all_subsets(item_set: tuple):
return chain.from_iterable(combinations(item_set, i) for i in range(len(item_set) + 1))
return chain.from_iterable(
combinations(item_set, i) for i in range(len(item_set) + 1)
)

def _get_association_rules(self, with_steps) -> pd.DataFrame:
rules = {}
Expand All @@ -160,8 +198,12 @@ def _get_association_rules(self, with_steps) -> pd.DataFrame:
subset_b = tuple(set(frequent_set) - set(subset_a))
if not subset_a or not subset_b:
continue
if (confidence := self.confidence(subset_a, subset_b)) >= self.min_confidence:
rules[f"{format_set(subset_a)} => {format_set(subset_b)}"] = round(confidence, 3)
if (
confidence := self.confidence(subset_a, subset_b)
) >= self.min_confidence:
rules[f"{format_set(subset_a)} => {format_set(subset_b)}"] = round(
confidence, 3
)

if with_steps:
self.saved_steps.append(
Expand Down
Loading