Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor #30

Merged
merged 32 commits into from
Nov 29, 2022
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
7caf527
feat: Create abstract classes and adjust to extra trees and kmeans im…
gregori0o Oct 23, 2022
979da8b
feat: Creat config file for Algorithm References.
gregori0o Oct 25, 2022
4ecb517
Merge branch 'develop' into @gregori0o/abstract-class-for-new-algorithms
gregori0o Oct 25, 2022
ef3092f
feat: Implement usage of algorithms_config.py file.
gregori0o Oct 25, 2022
98e7fa8
fix: Adjust algorithms to new classes
gregori0o Oct 26, 2022
4c16710
refactor: Move steps visualization to widgets.steps_widgets
gregori0o Oct 26, 2022
bede648
refactor: Separate canvas from steps widgets and move to visualizatio…
gregori0o Oct 26, 2022
5ddc9d8
fix: Review suggestions.
gregori0o Oct 26, 2022
6011f08
fix: Review suggestion
gregori0o Nov 7, 2022
b60aab6
refactor: Add pre-commit
gregori0o Nov 7, 2022
292a0c3
fix: Problems with imports
gregori0o Nov 8, 2022
f19cff3
refactor: Small changes
gregori0o Nov 8, 2022
08d5b9b
refactor: Remove check_numeric and use delect_dtypes (as in GMM)
gregori0o Nov 8, 2022
a6cf914
refactor: Merge clustering canvas
gregori0o Nov 8, 2022
c43f9e0
refactor: Create component for step visualization
gregori0o Nov 9, 2022
f78fdbc
refactor: Use ParametersGroupBox
gregori0o Nov 9, 2022
daa5be8
refactor: Add signals to clustering template
gregori0o Nov 10, 2022
a748883
refactor: Separate ClustersTable class
gregori0o Nov 10, 2022
0af9259
fix: Not change section if linalg error in gmm
gregori0o Nov 10, 2022
611b984
refactor: Use widget to samples data and column choice
gregori0o Nov 10, 2022
2e9fa50
refactor: Remove fig and axes argument from canvas.
gregori0o Nov 11, 2022
712264c
feat: Add scatter plot in preprocessing section.
gregori0o Nov 11, 2022
3405ecf
fix: Fix problems with pre-commit.
gregori0o Nov 11, 2022
d0372a3
upgrade: Upgrade requirements
gregori0o Nov 11, 2022
24f1f54
refactor: All algorithms in config was done.
gregori0o Nov 13, 2022
3ef6e7a
Merge branch 'develop' into @gregori0o/refactor
gregori0o Nov 13, 2022
38e1493
Merge branch 'develop' into @gregori0o/refactor
gregori0o Nov 21, 2022
caa8020
fix: Apply review suggestion.
gregori0o Nov 22, 2022
e311b4a
fix: Apply review suggestion.
gregori0o Nov 22, 2022
443b7a5
fix: Small changes from review.
gregori0o Nov 29, 2022
81d57b5
style: Apply pre-commit
gregori0o Nov 29, 2022
86b467d
Merge branch 'develop' into @gregori0o/refactor
gregori0o Nov 29, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/algorithms/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .utils import get_samples, check_numeric, get_threads_count
from .algorithm import Algorithm
23 changes: 23 additions & 0 deletions src/algorithms/algorithm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from typing import List
from abc import abstractmethod


class Algorithm:
"""
Abstract class of algorithm
"""

@abstractmethod
def run(self, with_steps: bool):
"""
Run algorithm and return result for class AlgorithmResultsWidget
If with_steps is true, saves steps of algorithm creation
"""
pass

@abstractmethod
def get_steps(self) -> List:
"""
Return list of steps for visualization by AlgorithmStepsVisualization
"""
pass
3 changes: 2 additions & 1 deletion src/algorithms/associations/a_priori.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
from typing import List, Tuple, Optional
from enum import Enum
from utils import format_set
from algorithms import Algorithm

import pandas as pd


class APriori:
class APriori(Algorithm):
def __init__(self, data: pd.DataFrame, index_column: str, min_support: float, min_confidence: float):
self.min_support = min_support
self.min_confidence = min_confidence
Expand Down
4 changes: 2 additions & 2 deletions src/algorithms/classification/extra_trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy as np
import pandas as pd
import joblib
from algorithms import check_numeric
from algorithms import check_numeric, Algorithm
from typing import Tuple, List, Callable, Optional, Dict
from algorithms import get_threads_count
from collections import deque
Expand Down Expand Up @@ -303,7 +303,7 @@ def creation_steps(self, get_color: Callable) -> Tuple[Dict, List[str]]:
return creation_info, steps


class ExtraTrees:
class ExtraTrees(Algorithm):

def __init__(self, data: pd.DataFrame, forest_size: int, **tree_parameters):
self.data = data
Expand Down
3 changes: 2 additions & 1 deletion src/algorithms/clustering/gmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
from numpy.linalg import LinAlgError
from scipy.stats import multivariate_normal
from math import inf
from algorithms import Algorithm


class GMM:
class GMM(Algorithm):
def __init__(self, df, num_clusters, eps=1e-6, max_iterations=None):
self.df = df.select_dtypes(include=['number'])
self.num_clusters = num_clusters
Expand Down
3 changes: 2 additions & 1 deletion src/algorithms/clustering/k_means.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import pandas as pd
import numpy as np
from typing import List, Tuple, Union, Optional
from algorithms import Algorithm

init_types = ['random', 'kmeans++']


class KMeans:
class KMeans(Algorithm):
def __init__(self, data: pd.DataFrame, num_clusters: int, metrics: int = 1, iterations: Optional[int] = None, repeats: int = 1, init_type: init_types = 'random'):
self.num_clusters = num_clusters
self.metrics = metrics
Expand Down
5 changes: 2 additions & 3 deletions src/app.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import sys
from PyQt5.QtWidgets import QApplication
from engines import ImportDataEngine, PreprocessingEngine, AlgorithmsEngine, ResultsEngine
from state import State
import matplotlib as plt

from engines import AlgorithmsEngine, ImportDataEngine, PreprocessingEngine, ResultsEngine
from widgets import MainWindow


def main():
app = QApplication(sys.argv)
state = State()
algorithm_engine = AlgorithmsEngine(state)
engines = {
Expand All @@ -17,7 +17,6 @@ def main():
'algorithm_run': algorithm_engine,
'results': ResultsEngine(state)
}
app = QApplication(sys.argv)
plt.rcParams.update({"font.size": 7})
window = MainWindow(engines)
sys.exit(app.exec_())
Expand Down
54 changes: 54 additions & 0 deletions src/engines/algorithms_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from typing import Dict, List
from enum import Enum
from dataclasses import dataclass

from algorithms import Algorithm
from algorithms.clustering import KMeans, GMM
from algorithms.associations import APriori
from algorithms.classification import ExtraTrees
from widgets.steps_widgets import AlgorithmStepsVisualization, APrioriStepsVisualization, KMeansStepsVisualization, \
ExtraTreesStepsVisualization, GMMStepsVisualization
from widgets.results_widgets import KMeansResultsWidget, ExtraTreesResultsWidget, AlgorithmResultsWidget, \
APrioriResultsWidget, GMMResultsWidget
from widgets.options_widgets import KMeansOptions, ExtraTreesOptions, AlgorithmOptions, AssociationRulesOptions, \
GMMOptions


class AlgorithmTechniques(Enum):
CLUSTERING = "clustering"
ASSOCIATIONS = "associations"
CLASSIFICATION = "classification"

@classmethod
def list(cls) -> List[str]:
return list(map(lambda e: e.value, cls))


@dataclass
class AlgorithmConfig:
algorithm: Algorithm.__class__
options: AlgorithmOptions.__class__
steps_visualization: AlgorithmStepsVisualization.__class__
result_widget: AlgorithmResultsWidget.__class__


ALGORITHMS_INFO: Dict[str, Dict[str, AlgorithmConfig]] = {
AlgorithmTechniques.CLUSTERING.value: {
'K-Means': AlgorithmConfig(algorithm=KMeans, options=KMeansOptions,
steps_visualization=KMeansStepsVisualization,
result_widget=KMeansResultsWidget),
'Gaussian Mixture Models': AlgorithmConfig(algorithm=GMM, options=GMMOptions,
steps_visualization=GMMStepsVisualization,
result_widget=GMMResultsWidget)
},
AlgorithmTechniques.ASSOCIATIONS.value: {
'Apriori': AlgorithmConfig(algorithm=APriori, options=AssociationRulesOptions,
steps_visualization=APrioriStepsVisualization,
result_widget=APrioriResultsWidget)
},
AlgorithmTechniques.CLASSIFICATION.value: {
'Extra Trees': AlgorithmConfig(algorithm=ExtraTrees, options=ExtraTreesOptions,
steps_visualization=ExtraTreesStepsVisualization,
result_widget=ExtraTreesResultsWidget)
}
}
70 changes: 35 additions & 35 deletions src/engines/algorithms_engine.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,22 @@
from algorithms.associations import APriori
from algorithms.classification import ExtraTrees
from algorithms.clustering import KMeans, GMM
from state import State
from visualization.associations import APrioriStepsVisualization
from visualization.clustering import KMeansStepsVisualization, GMMStepsVisualization
from visualization.classification import ExtraTreesStepsVisualization
from widgets.results_widgets import KMeansResultsWidget, ExtraTreesResultsWidget, APrioriResultsWidget, GMMResultsWidget
from .algorithms_config import ALGORITHMS_INFO, AlgorithmTechniques
from typing import List
from widgets.options_widgets import AlgorithmOptions


class AlgorithmsEngine:
def __init__(self, state: State):
self.state = state

self.algorithms_options = {
'clustering': {
'K-Means': (KMeans, KMeansStepsVisualization, KMeansResultsWidget),
'DBSCAN': None,
'Partition Around Medoids': None,
'Gaussian Mixture Models': (GMM, GMMStepsVisualization, GMMResultsWidget),
'Agglomerative clustering': None,
'Divisive clustering': None
},
'associations': {
'A-priori': (APriori, APrioriStepsVisualization, APrioriResultsWidget),
'A-prioriTID': None,
'FP-Growth': None
},
'classification': {
'KNN': None,
'Extra Trees': (ExtraTrees, ExtraTreesStepsVisualization, ExtraTreesResultsWidget),
'SVM': None
}
}
# init options widgets
self.options = {}
for technique, info in ALGORITHMS_INFO.items():
self.options[technique] = {algorithm: classes.options() for algorithm, classes in info.items()}

def run(self, technique, algorithm, will_be_visualized, is_animation, **kwargs):
chosen_alg = self.algorithms_options[technique][algorithm]
if chosen_alg is None:
return
alg = chosen_alg[0](self.state.imported_data, **kwargs)
chosen_alg = ALGORITHMS_INFO[technique][algorithm]

alg = chosen_alg.algorithm(self.state.imported_data, **kwargs)

result = alg.run(will_be_visualized)

Expand All @@ -46,7 +25,8 @@ def run(self, technique, algorithm, will_be_visualized, is_animation, **kwargs):

if will_be_visualized:
steps = alg.get_steps()
self.state.steps_visualization = chosen_alg[1](self.state.imported_data, steps, is_animation)
self.state.steps_visualization = chosen_alg.steps_visualization(self.state.imported_data, steps,
is_animation)
else:
self.state.steps_visualization = None

Expand All @@ -55,13 +35,33 @@ def run(self, technique, algorithm, will_be_visualized, is_animation, **kwargs):
self.state.algorithm_results_widgets[technique] = {}
if not self.state.algorithm_results_widgets[technique].get(algorithm):
self.state.algorithm_results_widgets[technique][algorithm] = []
self.state.algorithm_results_widgets[technique][algorithm].append(chosen_alg[2](self.state.raw_data, *result,
options=kwargs))
self.state.algorithm_results_widgets[technique][algorithm].append(chosen_alg.result_widget(self.state.raw_data,
*result,
options=kwargs))

def get_maximum_clusters(self) -> int:
if self.state.imported_data is None:
return 100
return self.state.imported_data.shape[0]

def get_columns(self) -> list:
def get_columns(self) -> List:
return list(self.state.imported_data.columns)

@staticmethod
def get_all_techniques() -> List:
return AlgorithmTechniques.list()

@staticmethod
def get_algorithms_for_techniques(technique: AlgorithmTechniques.list()) -> List:
return list(ALGORITHMS_INFO[technique].keys())

def get_option_widget(self, technique: AlgorithmTechniques.list(), algorithm: str) -> AlgorithmOptions:
return self.options[technique][algorithm]

def update_options(self):
clusters = min(self.get_maximum_clusters(), 100)
self.options[AlgorithmTechniques.CLUSTERING.value]["K-Means"].set_max_clusters(clusters)
columns = self.get_columns()
self.options[AlgorithmTechniques.ASSOCIATIONS.value]["A-priori"].set_columns_options(columns)
self.options[AlgorithmTechniques.CLASSIFICATION.value]["Extra Trees"].set_values(columns)
self.options[AlgorithmTechniques.CLUSTERING.value]["Gaussian Mixture Models"].set_max_clusters(clusters)
2 changes: 1 addition & 1 deletion src/engines/preprocessing_engine.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np
from state import State
from widgets.plots import HistogramPlot, PiePlot, FallbackPlot, NullFrequencyPlot
from visualization.plots import HistogramPlot, PiePlot, FallbackPlot, NullFrequencyPlot
from preprocess import DataCleaner


Expand Down
3 changes: 3 additions & 0 deletions src/visualization/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .a_priori_canvas import APrioriGauge, APrioriGraphPlot, APrioriScatterPlot
from .gmm_canvas import GMMCanvas
from .k_means_canvas import KMeansCanvas
2 changes: 0 additions & 2 deletions src/visualization/associations/__init__.py

This file was deleted.

1 change: 0 additions & 1 deletion src/visualization/classification/__init__.py

This file was deleted.

2 changes: 0 additions & 2 deletions src/visualization/clustering/__init__.py

This file was deleted.

92 changes: 92 additions & 0 deletions src/visualization/gmm_canvas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import numpy as np
from matplotlib import pyplot as plt, transforms
from matplotlib.backends.backend_qtagg import FigureCanvasQTAgg
from matplotlib.patches import Ellipse


class GMMCanvas(FigureCanvasQTAgg):
def __init__(self, fig, axes, animation):
self.axes = axes
self.animation = animation
super().__init__(fig)

def data_plot(self, vector_x, vector_y, name_x, name_y, min_x, max_x, min_y, max_y, drawing=True):
self.axes.cla()
self.axes.set_xlabel(name_x)
self.axes.set_ylabel(name_y)
self.axes.set_xlim(min_x, max_x)
self.axes.set_ylim(min_y, max_y)
self.axes.scatter(x=vector_x, y=vector_y)
if drawing:
self.draw()
if self.animation:
return self.axes.collections

def _draw_variance(self, mean, sigma, label, max_label, n_std=2.0):
pearson = sigma[0][1] / np.sqrt(sigma[0][0] * sigma[1][1])
ell_radius_x = np.sqrt(1 + pearson)
ell_radius_y = np.sqrt(1 - pearson)
cmap = plt.get_cmap('gist_rainbow')
ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2,
edgecolor=cmap(label / max_label), facecolor='none')
scale_x = np.sqrt(sigma[0][0]) * n_std
scale_y = np.sqrt(sigma[1][1]) * n_std
transf = transforms.Affine2D() \
.rotate_deg(45) \
.scale(scale_x, scale_y) \
.translate(mean[0], mean[1])
ellipse.set_transform(transf + self.axes.transData)
self.axes.add_patch(ellipse)

def clusters_plot(self, vector_x, vector_y, columns, mean, sigma, labels, max_label, name_x, name_y, min_x, max_x,
min_y, max_y, drawing=True):
self.axes.cla()
self.axes.set_xlabel(name_x)
self.axes.set_ylabel(name_y)
self.axes.set_xlim(min_x, max_x)
self.axes.set_ylim(min_y, max_y)
self.axes.scatter(vector_x, vector_y, c=labels, cmap='gist_rainbow', vmin=0, vmax=max_label)
x_index, y_index = [columns.index(name_x), columns.index(name_y)]
for i in range(len(mean)):
mean_i = [mean[i][x_index], mean[i][y_index]]
sigma_i = [
[sigma[i][x_index][x_index], sigma[i][x_index][y_index]],
[sigma[i][y_index][x_index], sigma[i][y_index][y_index]],
]
self.axes.scatter(mean_i[0], mean_i[1], c=i, cmap='gist_rainbow', marker='s', vmin=0, vmax=max_label)
self._draw_variance(mean_i, sigma_i, i, max_label)

if drawing:
self.draw()
if self.animation:
return self.axes.collections

def chosen_cluster_plot(self, vector_x, vector_y, mean, sigma, label, max_label, name_x, name_y, min_x, max_x,
min_y, max_y, drawing=True):
self.axes.cla()
self.axes.set_xlabel(name_x)
self.axes.set_ylabel(name_y)
self.axes.set_xlim(min_x, max_x)
self.axes.set_ylim(min_y, max_y)
self.axes.scatter(vector_x, vector_y, c=[label] * len(vector_x), cmap='gist_rainbow',
vmin=0, vmax=max_label, alpha=0.9)
self.axes.scatter([mean[0]], [mean[1]], c=[label], cmap='gist_rainbow', vmin=0, vmax=max_label,
edgecolor='black', linewidths=1, marker='s', alpha=0.7, s=50)
self._draw_variance(mean, sigma, label, max_label, n_std=1.0)
if drawing:
self.draw()

def clusters_means_plot(self, means, sigmas, name_x, name_y, min_x, max_x, min_y, max_y, drawing=True):
self.axes.cla()
x_means, y_means = means
max_label = len(x_means)
self.axes.set_xlabel(name_x)
self.axes.set_ylabel(name_y)
self.axes.set_xlim(min_x, max_x)
self.axes.set_ylim(min_y, max_y)
self.axes.scatter(x_means, y_means, c=np.arange(max_label),
marker='s', cmap='gist_rainbow', vmin=0, vmax=max_label, edgecolor='black', linewidths=1)
for i in range(len(x_means)):
self._draw_variance([x_means[i], y_means[i]], sigmas[i], i, max_label, n_std=1.0)
if drawing:
self.draw()
Loading