diff --git a/apps/dash-svm-2022/LICENSE b/apps/dash-svm-2022/LICENSE new file mode 100644 index 000000000..eb7020076 --- /dev/null +++ b/apps/dash-svm-2022/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 T.ii + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/apps/dash-svm-2022/Procfile b/apps/dash-svm-2022/Procfile new file mode 100644 index 000000000..b308fd89d --- /dev/null +++ b/apps/dash-svm-2022/Procfile @@ -0,0 +1 @@ +web: gunicorn app:server \ No newline at end of file diff --git a/apps/dash-svm-2022/README.md b/apps/dash-svm-2022/README.md new file mode 100644 index 000000000..1c1f1236c --- /dev/null +++ b/apps/dash-svm-2022/README.md @@ -0,0 +1,10 @@ +# About the app + +This is a 2022 replica of [dash-svm](https://dash.gallery/dash-svm/). I used dash's recent new components and new features and had a great experience. + +![alt text](assets/screenshot.png "Screenshot") + +You can see it [here](https://dash-svm-2022.herokuapp.com/). There's also a [gif](assets/demo.gif). + +It already contains the necessary profiles needed to deploy on Heroku and you can easily run it. + diff --git a/apps/dash-svm-2022/app.py b/apps/dash-svm-2022/app.py new file mode 100644 index 000000000..5cbbfb222 --- /dev/null +++ b/apps/dash-svm-2022/app.py @@ -0,0 +1,586 @@ +# -*- utf-8 -*- + +import dash_bootstrap_components as dbc +from dash import ( + Input, + Output, + State, + html, + Dash, + dcc, + dash_table, + get_asset_url, + ALL, + MATCH, + ClientsideFunction, + callback_context, + no_update, +) +from dash.exceptions import PreventUpdate + +# ========================================== + +# ========================================== +import time +import numpy as np +import pandas as pd + +# ========================================== +from utils.sampling import sampling, df_split, data_split +from utils.modeling import modeling +from utils.charting import prediction_plot, confusion_matrix_plot, roc_curve_plot + +# ========================================== +from utils.handle_func import * +from utils.split_components import * + +# ========================================== + +# ========================================== + +# ========================================== + +# ========================================== + +# ========================================== + +# ========================================== + +meta_tags = [ # Please modify before deploying. + dict( + name="description", + content="This is a 2022 replica of dash-svm. I used dash's recent new components and new features, and I've added new page elements.", + ), + dict(property="og:url", content="https://plotly.com/"), + dict(property="og:type", content="website"), + dict(property="og:title", content="Support Vector Machine Explorer"), + dict( + property="og:description", + content="This is a 2022 replica of dash-svm. I used dash's recent new components and new features, and I've added new page elements.", + ), + dict( + property="og:image", + content="https://plotly-marketing-website.cdn.prismic.io/plotly-marketing-website/948b6663-9429-4bd6-a4cc-cb33231d4532_logo-plotly.svg", + ), + dict(name="twitter:card", content="summary_large_image"), + dict(property="twitter:domain", content="www.plotly.com/"), + dict(property="twitter:url", content="https://plotly.com/"), + dict(name="twitter:title", content="Support Vector Machines"), + dict( + name="twitter:description", + content="This is a 2022 replica of dash-svm. I used dash's recent new components and new features, and I've added new page elements.", + ), + dict( + name="twitter:image", + content="https://plotly-marketing-website.cdn.prismic.io/plotly-marketing-website/948b6663-9429-4bd6-a4cc-cb33231d4532_logo-plotly.svg", + ), +] +# ========================================== + +app = Dash( + __name__, + title="Support Vector Machine Explorer", + update_title="Eating...", + # external_scripts=external_scripts, + meta_tags=meta_tags, + external_stylesheets=[dbc.themes.FLATLY, dbc.icons.BOOTSTRAP], +) + +server = app.server # for deployment + +app.config.suppress_callback_exceptions = True +# ========================================== +# ========================================== + +# =============layout======================= + +app.layout = html.Div( + [ + dbc.Navbar( + dbc.Container( + [ + html.A( + dbc.Row( + [ + dbc.Col( + html.Img( + src="https://plotly-marketing-website.cdn.prismic.io/plotly-marketing-website/948b6663-9429-4bd6-a4cc-cb33231d4532_logo-plotly.svg", + height="30px", + ) + ), + dbc.Col( + dbc.NavbarBrand( + "Support Vector Machine Explorer", + className="ms-2", + ) + ), + ], + align="center", + className="g-0", + ), + href="/", + style={"textDecoration": "none"}, + ), + ] + ) + ), + dbc.Row( + [ + dbc.Col( + dbc.Container( + [ + html.Br(), + dbc.Row( + dbc.Col( + fig_0 := dcc.Graph( + config=dict(displayModeBar="hover") + ) + ) + ), + html.Br(), + dbc.Row( + [ + dbc.Col( + fig_1 := dcc.Graph( + config=dict(displayModeBar=False) + ), + width=6, + align="center", + ), + dbc.Col( + fig_2 := dcc.Graph( + config=dict(displayModeBar=False) + ), + width=6, + align="center", + ), + ] + ), + html.Br(), + dbc.Row( + alert := dbc.Alert( + is_open=False, + dismissable=True, + duration=2000, + className="d-flex align-items-center", + style={ + "padding-left": "45px", + "margin-top": "55px", + }, + ) + ), + ], + ), + width=8, + ), + dbc.Col( + dbc.Container( + [ + html.Br(), + uploader_btn, + html.Br(), + threshold, + html.Br(), + kernel, + html.Br(), + formula, + html.Br(), + cost, + html.Br(), + degree, + html.Br(), + gamma, + html.Br(), + html.Br(), + shrinking, + html.Br(), + ] + ), + width=4, + ), + ] + ), + dbc.Row( + [ + svm_params := html.Div(style={"display": "none"}), + # It seems like the current version of dcc.Store doesn't support assignment expressions. + dcc.Store( + id="datasets_params", + storage_type="memory", + data=[ + [ + {"type": "dataset_parameter", "index": "dataset"}, + {"type": "dataset_parameter", "index": "sample_size"}, + {"type": "dataset_parameter", "index": "noise"}, + {"type": "dataset_parameter", "index": "test_size"}, + ], + ["LS", 100, 0.4, 0.25], + [], + [], + [], + [], + [], + [], + "tab-0", + ], + ), + ] + ), + ] +) + +# ========================================== + +# ================callbacks================= +app.callback( + [ + Output({"type": "uploader_parameter", "index": "x"}, "options"), + Output({"type": "uploader_parameter", "index": "y"}, "options"), + Output({"type": "uploader_parameter", "index": "c"}, "options"), + ], + [Input({"type": "uploader_parameter", "index": "uploader"}, "contents")], + [State({"type": "uploader_parameter", "index": "uploader"}, "filename")], + prevent_initial_call=True, +)( + lambda data, filename: 3 * [parse_contents(data, filename, header=True)] + if data + else 3 * [no_update] +) + + +@app.callback( + Output({"type": "tabs-table", "id": "t2"}, "data"), + [Input({"type": "uploader_parameter", "index": ALL}, "value")], + [ + State({"type": "uploader_parameter", "index": ALL}, "id"), + State({"type": "uploader_parameter", "index": "uploader"}, "contents"), + State({"type": "uploader_parameter", "index": "uploader"}, "filename"), + ], +) +def update_output(xyc, idx, uploaded_data, filename): + data_params = {j["index"]: xyc[i] for i, j in enumerate(idx)} + if ( + not all([v for k, v in data_params.items() if k not in ["uploader"]]) + or not uploaded_data + ): + raise PreventUpdate + else: + + df0 = parse_contents( + uploaded_data, + filename, + header=False, + usecols=[ + v for k, v in data_params.items() if k not in ["uploader", "test_size"] + ], + ) + + df0.rename( + columns={v: k for k, v in data_params.items() if k != "test_size"}, + inplace=1, + ) + + data_params |= {"df": df0} + + data = df_split(**data_params) + + split_data = data[0] + + df1 = pd.DataFrame(split_data[0], columns=["x", "y"]) + df1["c"] = split_data[2] + df1["s"] = "TRAIN" + df2 = pd.DataFrame(split_data[1], columns=["x", "y"]) + df2["c"] = split_data[3] + df2["s"] = "TEST" + df = pd.concat([df1, df2]) + + return df.to_dict("records") + + +@app.callback( + Output({"type": "tabs-table", "id": "t3"}, "data"), + [ + Input({"type": "canvas_parameter", "index": "test_size"}, "value"), + Input({"type": "canvas_parameter", "index": "canvas"}, "json_data"), + ], +) +def canvas_output(test_size, canvas_data): + if not canvas_data: + raise PreventUpdate + + X, y = handle_json(canvas_data) + + params = {"test_size": test_size, "X": X, "y": y} + + data = data_split(**params) + + split_data = data[0] + + df1 = pd.DataFrame(split_data[0], columns=["x", "y"]) + df1["c"] = split_data[2] + df1["s"] = "TRAIN" + df2 = pd.DataFrame(split_data[1], columns=["x", "y"]) + df2["c"] = split_data[3] + df2["s"] = "TEST" + df = pd.concat([df1, df2]) + + return df.to_dict("records") + + +@app.callback( + Output({"type": "tabs-table", "id": "t1"}, "data"), + [Input({"type": "dataset_parameter", "index": ALL}, "value")], + [State({"type": "dataset_parameter", "index": ALL}, "id")], +) +def generate_data(value, idx): + + data_params = {j["index"]: value[i] for i, j in enumerate(idx)} + + data = sampling(**data_params) + + split_data = data[0] + + df1 = pd.DataFrame(split_data[0], columns=["x", "y"]) + df1["c"] = split_data[2] + df1["s"] = "TRAIN" + df2 = pd.DataFrame(split_data[1], columns=["x", "y"]) + df2["c"] = split_data[3] + df2["s"] = "TEST" + df = pd.concat([df1, df2]) + return df.to_dict("records") + + +@app.callback( + Output(svm_params, "children"), + [Input({"type": "svm_parameter", "index": ALL}, "value")], + [State({"type": "svm_parameter", "index": ALL}, "id")], +) +def params_update(value, idx): + df = pd.DataFrame({"index": [i["index"] for i in idx], "value": value}) + return dash_table.DataTable( + df.to_dict("records"), [{"name": i, "id": i} for i in df.columns] + ) + + +@app.callback( + [ + Output(fig_0, "figure"), + Output(fig_1, "figure"), + Output(fig_2, "figure"), + Output(alert, "children"), + Output(alert, "is_open"), + ], + [ + Input(save_btn, "n_clicks"), + Input({"type": "svm_parameter", "index": ALL}, "value"), + ], + [ + State({"type": "svm_parameter", "index": ALL}, "id"), + State({"type": "dataset_parameter", "index": ALL}, "id"), + State({"type": "dataset_parameter", "index": ALL}, "value"), + State({"type": "uploader_parameter", "index": ALL}, "id"), + State({"type": "uploader_parameter", "index": ALL}, "value"), + State({"type": "uploader_parameter", "index": ALL}, "contents"), + State({"type": "uploader_parameter", "index": ALL}, "filename"), + State({"type": "canvas_parameter", "index": ALL}, "json_data"), + State({"type": "canvas_parameter", "index": ALL}, "value"), + State(tabs, "active_tab"), + State("datasets_params", "data"), + ], +) +def params_update( + n_clicks, + value, + idx, + tab_1_idx, + tab_1_values, + tab_2_idx, + tab_2_values, + uploaded_data, + filename, + canvas_data, + tab_3_params, + at, + tabs_cache, +): + t1 = time.perf_counter() + + if callback_context.triggered[0]["prop_id"].split(".")[0] != save_btn.id: + [ + tab_1_idx, + tab_1_values, + tab_2_idx, + tab_2_values, + uploaded_data, + filename, + canvas_data, + tab_3_params, + at, + ] = tabs_cache + + if at == "tab-0": + data_1_params = {j["index"]: tab_1_values[i] for i, j in enumerate(tab_1_idx)} + + data = sampling(**data_1_params) + + elif at == "tab-1": + data_2_params = {j["index"]: tab_2_values[i] for i, j in enumerate(tab_2_idx)} + + df0 = parse_contents( + list(filter(None, uploaded_data))[0], + # This is not recommended. I did this to bypass a bug in the current version where when I wanted to call back a component placed in dbc.Tab, the process would report an error saying that the component could not be found. + list(filter(None, filename))[0], + header=False, + usecols=[ + v + for k, v in data_2_params.items() + if k not in ["uploader", "test_size"] + ], + ) + + df0.rename( + columns={v: k for k, v in data_2_params.items() if k != "test_size"}, + inplace=1, + ) + + data_2_params |= {"df": df0} + + data = df_split(**data_2_params) + + elif at == "tab-2": + X, y = handle_json( + list(filter(None, canvas_data))[0], + ) + + split_params = { + "test_size": list(filter(None, tab_3_params))[0], + "X": X, + "y": y, + } + + data = data_split(**split_params) + + if not data: + raise PreventUpdate + + params = {j["index"]: value[i] for i, j in enumerate(idx)} + + params |= {"data": data} + params |= {"cost": 10 ** params["cost_power"] * params["cost_coef"]} + params |= {"gamma": 10 ** params["gamma_power"] * params["gamma_coef"]} + + model = modeling(**params) + params |= {"model": model} + + fig_0 = prediction_plot(**params) + fig_1 = roc_curve_plot(**params) + fig_2 = confusion_matrix_plot(**params) + + t2 = time.perf_counter() + + alert_info = [ + html.I(className="bi bi-check-circle-fill me-2"), + "It took {:.3} seconds".format(t2 - t1), + ] + + return fig_0, fig_1, fig_2, alert_info, True + + +@app.callback(Output(offcanvas_content, "children"), [Input(tabs, "active_tab")]) +def switch_tab(at): + if at == "tab-0": + return tab_1_content + elif at == "tab-1": + return tab_2_content + elif at == "tab-2": + return tab_3_content + return html.P("Something is wrong...") + + +# ========================================== + +app.clientside_callback( + ClientsideFunction(namespace="clientside", function_name="reset_threshold"), + Output({"type": "svm_parameter", "index": "threshold"}, "value"), + [Input(threshold_btn, "n_clicks")], + [State(fig_0, "figure")], +) + +app.clientside_callback( + ClientsideFunction(namespace="clientside", function_name="canvas_toggle"), + Output({"type": "canvas_parameter", "index": "canvas"}, "lineColor"), + [Input({"type": "canvas_parameter", "index": "toggle"}, "n_clicks")], + [State({"type": "canvas_parameter", "index": "canvas"}, "lineColor")], +) + +app.clientside_callback( + ClientsideFunction(namespace="clientside", function_name="datasets_params_store"), + Output("datasets_params", "data"), + [Input(save_btn, "n_clicks")], + [ + # tab_1_idx, tab_1_values, tab_2_idx,tab_2_values, uploaded_data, filename, canvas_data, tab_3_params + State({"type": "dataset_parameter", "index": ALL}, "id"), + State({"type": "dataset_parameter", "index": ALL}, "value"), + State({"type": "uploader_parameter", "index": ALL}, "id"), + State({"type": "uploader_parameter", "index": ALL}, "value"), + State({"type": "uploader_parameter", "index": ALL}, "contents"), + State({"type": "uploader_parameter", "index": ALL}, "filename"), + State({"type": "canvas_parameter", "index": ALL}, "json_data"), + State({"type": "canvas_parameter", "index": ALL}, "value"), + State(tabs, "active_tab"), + ], +) + +app.clientside_callback( + ClientsideFunction(namespace="clientside", function_name="open_offcanvas"), + Output(offcanvas, "is_open"), + [Input(offcanvas_btn, "n_clicks"), Input(save_btn, "n_clicks")], + [State(offcanvas, "is_open")], +) + +app.clientside_callback( + ClientsideFunction(namespace="clientside", function_name="btn_disabled"), + Output(save_btn, "disabled"), + [Input({"type": "tabs-table", "id": ALL}, "is_loading")], +) + +app.clientside_callback( + ClientsideFunction(namespace="clientside", function_name="disable_param_degree"), + Output({"type": "svm_parameter", "index": "degree"}, "disabled"), + [Input({"type": "svm_parameter", "index": "kernel"}, "value")], +) + +app.clientside_callback( + ClientsideFunction(namespace="clientside", function_name="kernel_formula"), + Output(latex_formula, "children"), + [Input({"type": "svm_parameter", "index": "kernel"}, "value")], +) + +app.clientside_callback( + ClientsideFunction(namespace="clientside", function_name="disable_param_gamma"), + [ + Output({"type": "svm_parameter", "index": "gamma_power"}, "disabled"), + Output({"type": "svm_parameter", "index": "gamma_coef"}, "disabled"), + ], + [Input({"type": "svm_parameter", "index": "kernel"}, "value")], +) + +app.clientside_callback( + ClientsideFunction(namespace="clientside", function_name="scale_param"), + Output({"type": "svm_parameter", "index": "cost_coef"}, "marks"), + [Input({"type": "svm_parameter", "index": "cost_power"}, "value")], +) + +app.clientside_callback( + ClientsideFunction(namespace="clientside", function_name="scale_param"), + Output({"type": "svm_parameter", "index": "gamma_coef"}, "marks"), + Input({"type": "svm_parameter", "index": "gamma_power"}, "value"), +) + +# ========================================== +# ========================================== +# ========================================== +# ========================================== + +if __name__ == "__main__": + app.run_server(debug=True) diff --git a/apps/dash-svm-2022/assets/canvas_bg.png b/apps/dash-svm-2022/assets/canvas_bg.png new file mode 100644 index 000000000..69c8e3183 Binary files /dev/null and b/apps/dash-svm-2022/assets/canvas_bg.png differ diff --git a/apps/dash-svm-2022/assets/custom.css b/apps/dash-svm-2022/assets/custom.css new file mode 100644 index 000000000..f7608127b --- /dev/null +++ b/apps/dash-svm-2022/assets/custom.css @@ -0,0 +1,55 @@ +[class^="daq-slider"] { + margin-top: 25px; + margin-bottom: 12px; +} + +[class^="daq-knob"] { + margin-top: 15px; +} + +._dash-loading { + margin: auto; + color: transparent; + width: 0; + height: 0; + text-align: center; +} + +._dash-loading::after { + content: ''; + display: inline-block; + width: 2rem; + height: 2rem; + color: black; + vertical-align: text-bottom; + border: 0.25em solid currentColor; + border-right-color: transparent; + border-radius: 50%; + -webkit-animation: spinner-border 0.75s linear infinite; + animation: spinner-border 0.75s linear infinite; + margin-top: 10rem; +} + +.canvas_container button:hover { + color: #fff; + background-color: #253544; + border-color: #233240; +} + +.canvas_container button { + color: #fff !important; + background-color: #2c3e50; + border-color: #2c3e50; + display: inline-block; + font-weight: 400; + line-height: 1.5; + text-align: center; + text-decoration: none; + vertical-align: middle; + user-select: none; + border: 1px solid transparent; + padding: .375rem .75rem; + font-size: 1rem; + border-radius: .25rem; + transition: color .15s ease-in-out, background-color .15s ease-in-out, border-color .15s ease-in-out, box-shadow .15s ease-in-out; +} \ No newline at end of file diff --git a/apps/dash-svm-2022/assets/custom.js b/apps/dash-svm-2022/assets/custom.js new file mode 100644 index 000000000..8d36ec0ec --- /dev/null +++ b/apps/dash-svm-2022/assets/custom.js @@ -0,0 +1,31 @@ +window.dash_clientside = Object.assign({}, window.dash_clientside, { + clientside: { + canvas_toggle: (n, c) => c === '#509188' ? '#FF7070' : '#509188', + datasets_params_store: (...rest) => rest[0] > 0 ? Object.values(rest).splice(1) : window.dash_clientside.no_update, + open_offcanvas: (n, sn, is_open) => n || sn ? !is_open : is_open, + btn_disabled: tabs_table => tabs_table.some(Boolean), + disable_param_degree: kernel => kernel != 'poly', + reset_threshold: (n_clicks, fig) => { + if (n_clicks) { + let Z = fig['data'][0]['z'].flat(Infinity); + return -Math.min(...Z) / (Math.max(...Z) - Math.min(...Z)); + } else { + return 0.5; + } + }, + kernel_formula: kernel => ({ + 'rbf': '$K(x, z) = exp(-\\gamma||x-z||^2)$', + 'linear': '$K(x, z) = x \\bullet z$', + 'poly': '$K(x,z) = (\\gamma x \\bullet z+r)^d$', + 'sigmoid': '$K(x,z) = tanh(\\gamma x \\bullet z+r)$' + }[kernel]), + disable_param_gamma: kernel => { let _ = ['rbf', 'poly', 'sigmoid'].includes(kernel); return [_, _]; }, + scale_param: power => { + let labels = {}; + for (i of Array.from(Array(5).keys(), n => 2 * n + 1)) { + labels[i] = power < 0 ? (i / 10 ** -power).toString() : (i * 10 ** power).toString() + }; + return labels; + } + } +}); diff --git a/apps/dash-svm-2022/assets/demo.gif b/apps/dash-svm-2022/assets/demo.gif new file mode 100644 index 000000000..fd27d5349 Binary files /dev/null and b/apps/dash-svm-2022/assets/demo.gif differ diff --git a/apps/dash-svm-2022/assets/logo.png b/apps/dash-svm-2022/assets/logo.png new file mode 100644 index 000000000..1a215173a Binary files /dev/null and b/apps/dash-svm-2022/assets/logo.png differ diff --git a/apps/dash-svm-2022/assets/screenshot.png b/apps/dash-svm-2022/assets/screenshot.png new file mode 100644 index 000000000..cd3aa35d9 Binary files /dev/null and b/apps/dash-svm-2022/assets/screenshot.png differ diff --git a/apps/dash-svm-2022/requirements.txt b/apps/dash-svm-2022/requirements.txt new file mode 100644 index 000000000..c08edd7c1 --- /dev/null +++ b/apps/dash-svm-2022/requirements.txt @@ -0,0 +1,10 @@ +dash==2.3.1 +pandas==1.4.2 +dash_daq==0.5.0 +numpy==1.22.3 +dash_canvas==0.1.0 +dash_bootstrap_components==1.1.0 +plotly==5.7.0 +scikit_learn==1.0.2 +gunicorn==20.1.0 +openpyxl==3.0.9 diff --git a/apps/dash-svm-2022/runtime.txt b/apps/dash-svm-2022/runtime.txt new file mode 100644 index 000000000..3338e5c1a --- /dev/null +++ b/apps/dash-svm-2022/runtime.txt @@ -0,0 +1 @@ +python-3.10.4 \ No newline at end of file diff --git a/apps/dash-svm-2022/utils/__init__.py b/apps/dash-svm-2022/utils/__init__.py new file mode 100644 index 000000000..4287ca861 --- /dev/null +++ b/apps/dash-svm-2022/utils/__init__.py @@ -0,0 +1 @@ +# \ No newline at end of file diff --git a/apps/dash-svm-2022/utils/charting.py b/apps/dash-svm-2022/utils/charting.py new file mode 100644 index 000000000..aa02f5b6b --- /dev/null +++ b/apps/dash-svm-2022/utils/charting.py @@ -0,0 +1,177 @@ +import plotly.graph_objs as go +import plotly.express as px +from sklearn.metrics import roc_curve, confusion_matrix, roc_auc_score, accuracy_score +import numpy as np +import pandas as pd + +mesh_size = 0.02 + + +def prediction_plot(**kwargs): + + _data = kwargs['data'] + split_data = _data[0] + model = kwargs['model'] + + y_pred_train = (model[0].decision_function(split_data[0]) > + kwargs['threshold']).astype(int) + y_pred_test = (model[0].decision_function(split_data[1]) > + kwargs['threshold']).astype(int) + train_score = accuracy_score(y_true=split_data[2].astype(int), + y_pred=y_pred_train) + test_score = accuracy_score(y_true=split_data[3].astype(int), + y_pred=y_pred_test) + + scaled_threshold = kwargs['threshold'] * (model[1].max() - + model[1].min()) + model[1].min() + + range = max(abs(scaled_threshold - model[1].min()), + abs(scaled_threshold - model[1].max())) + + trace0 = go.Contour(x=np.arange(model[2].min(), model[2].max(), mesh_size), + y=np.arange(model[3].min(), model[3].max(), mesh_size), + z=model[1].reshape(model[2].shape), + zmin=scaled_threshold - range, + zmax=scaled_threshold + range, + hoverinfo='none', + showscale=False, + contours=dict(showlines=False), + colorscale='rdgy', + opacity=0.6) + + trace1 = go.Contour(x=np.arange(model[2].min(), model[2].max(), mesh_size), + y=np.arange(model[3].min(), model[3].max(), mesh_size), + z=model[1].reshape(model[2].shape), + showscale=False, + hoverinfo='none', + contours=dict( + showlines=False, + type='constraint', + operation='=', + value=scaled_threshold, + ), + name=f'Threshold ({scaled_threshold:.3f})', + line=dict(color='#454545')) + + trace2 = go.Scatter(x=split_data[0][:, 0], + y=split_data[0][:, 1], + mode='markers', + name=f'Training Data (accuracy={train_score:.3f})', + marker=dict(size=10, + color=split_data[2].astype(int), + colorscale='tealrose', + line=dict(width=1))) + + trace3 = go.Scatter(x=split_data[1][:, 0], + y=split_data[1][:, 1], + mode='markers', + name=f'Test Data (accuracy={test_score:.3f})', + marker=dict( + size=10, + symbol='triangle-up', + color=split_data[3].astype(int), + colorscale='tealrose', + line=dict(width=1), + )) + + layout = go.Layout(xaxis=dict( + ticks='', + showticklabels=False, + showgrid=False, + zeroline=False, + ), + transition=dict(easing='exp-in-out', + ordering="traces first", + duration=500), + yaxis=dict( + ticks='', + showticklabels=False, + showgrid=False, + zeroline=False, + ), + plot_bgcolor="#fff", + paper_bgcolor="#fff", + hovermode='closest', + legend=dict(x=0, y=-0.01, orientation="h"), + margin=dict(l=0, r=0, t=0, b=0)) + + fig = go.Figure(data=[trace0, trace1, trace2, trace3], layout=layout) + + return fig + + +def roc_curve_plot(**kwargs): + + _data = kwargs['data'] + split_data = _data[0] + model = kwargs['model'] + + y_score = model[0].decision_function(_data[1]) + fpr, tpr, thresholds = roc_curve(_data[2], y_score) + + auc_score = roc_auc_score(y_true=_data[2], y_score=y_score) + + fig = px.line(x=fpr, y=tpr) + fig.update_traces(hovertemplate=None, line_color='rgb(49,130,189)') + fig.update_layout( + title={ + 'text': f'ROC Curve (AUC = {auc_score:.3f})', + 'y': 0.5, + 'x': 0.5, + 'xanchor': 'center', + 'yanchor': 'bottom' + }, + transition=dict(easing='cubic-in-out', duration=500), + yaxis=dict( #range=[0, 1], + title='True Positive Rate', + scaleanchor="x", + scaleratio=1), + xaxis=dict( #range=[0, 1], + title='False Positive Rate', constrain='domain'), + hovermode='closest', + height=400, + showlegend=False, + plot_bgcolor='#FAF9DE', + margin=dict(l=10, r=0, t=50, b=20)) + + return fig + + +def confusion_matrix_plot(**kwargs): + + _data = kwargs['data'] + split_data = _data[0] + model = kwargs['model'] + + scaled_threshold = kwargs['threshold'] * (model[1].max() - + model[1].min()) + model[1].min() + y_pred_test = (model[0].decision_function(split_data[1]) > + scaled_threshold).astype(int).astype(str) + + matrix = confusion_matrix(y_true=split_data[3], y_pred=y_pred_test) + mtx = matrix / matrix.sum() + + label_text = [["True Negative", "False Positive"], + ["False Negative", "True Positive"]] + + fig = px.imshow(mtx, + x=['X', 'y'], + y=['X', 'y'], + color_continuous_scale='sunsetdark', + zmin=0, + zmax=1, + aspect="auto") + + fig.update_traces(text=label_text, + texttemplate="%{text}", + name='', + customdata=matrix, + hovertemplate='%{customdata:,}') + + fig.update_layout(xaxis_title="TRAIN", + yaxis_title="TEST", + transition=dict(easing='sin-in-out', duration=500), + height=400, + margin=dict(l=10, r=20, t=50, b=20)) + + return fig diff --git a/apps/dash-svm-2022/utils/handle_func.py b/apps/dash-svm-2022/utils/handle_func.py new file mode 100644 index 000000000..b112e1c5c --- /dev/null +++ b/apps/dash-svm-2022/utils/handle_func.py @@ -0,0 +1,41 @@ +import io +import base64 +import json +import numpy as np +import pandas as pd + + +def parse_contents(contents, filename, header, usecols=None): + content_type, content_string = contents.split(',') + + decoded = io.BytesIO(base64.b64decode(content_string)) + try: + if filename.endswith('csv'): + # Assume that the user uploaded a CSV file + df = pd.read_csv(decoded, usecols=usecols) + elif filename.endswith('xls') or filename.endswith('xlsx'): + # Assume that the user uploaded an excel file + df = pd.read_excel(decoded, usecols=usecols) + else: + print('What files?') + + except: + print('Somthing is wrong with uploader.') + + if header: + return df.columns + + else: + return df + + +def handle_json(js): + df = pd.DataFrame(json.loads(js)['objects']) + df.fillna(value={'path': ''}, inplace=True) + df['dot'] = df['path'].apply(lambda x: 1 if len(x) == 2 else 0) + df['c'] = df['stroke'].apply(lambda x: 0 if x == '#509188' else 1) + X = df[df['dot'] == 1]['pathOffset'].apply(pd.Series).to_numpy() + y = df[df['dot'] == 1]['c'].to_numpy() + X = (X - 250) / 500 + X[:, 1] = -X[:, 1] + return X, y diff --git a/apps/dash-svm-2022/utils/modeling.py b/apps/dash-svm-2022/utils/modeling.py new file mode 100644 index 000000000..5bfc40835 --- /dev/null +++ b/apps/dash-svm-2022/utils/modeling.py @@ -0,0 +1,36 @@ +import numpy as np +from sklearn.svm import SVC + +margin = 0.25 +mesh_size = 0.02 + + +def modeling(**kwargs): + + _data = kwargs['data'] + split_data = _data[0] + + + + x_min, x_max = _data[1][:, 0].min() - margin, _data[1][:, 0].max() + margin + y_min, y_max = _data[1][:, 1].min() - margin, _data[1][:, 1].max() + margin + + xrange = np.arange(x_min, x_max, mesh_size) + yrange = np.arange(y_min, y_max, mesh_size) + + xx, yy = np.meshgrid(xrange, yrange) + + clf = SVC(C=kwargs['cost'], + kernel=kwargs['kernel'], + degree=kwargs['degree'], + gamma=kwargs['gamma'], + shrinking=kwargs['shrinking']) + + clf.fit(split_data[0], split_data[2]) + + if hasattr(clf, "decision_function"): + Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) + else: + Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] + + return clf, Z, xx, yy, xrange, yrange diff --git a/apps/dash-svm-2022/utils/sampling.py b/apps/dash-svm-2022/utils/sampling.py new file mode 100644 index 000000000..da4607b71 --- /dev/null +++ b/apps/dash-svm-2022/utils/sampling.py @@ -0,0 +1,65 @@ +import numpy as np +import pandas as pd +from sklearn import datasets +from sklearn.model_selection import train_test_split + +test_size = 0.25 + + +def sampling(**kwargs): + if kwargs['dataset'] == 'moons': + X, y = datasets.make_moons(n_samples=kwargs['sample_size'], + noise=kwargs['noise'], + random_state=5) + + return train_test_split(X, + y.astype(str), + test_size=kwargs['test_size'], + random_state=5), X, y + + elif kwargs['dataset'] == 'circles': + X, y = datasets.make_circles(n_samples=kwargs['sample_size'], + noise=kwargs['noise'], + factor=0.5, + random_state=1) + return train_test_split(X, + y.astype(str), + test_size=kwargs['test_size'], + random_state=5), X, y + + elif kwargs['dataset'] == 'LS': + X, y = datasets.make_classification(n_samples=kwargs['sample_size'], + n_features=2, + n_redundant=0, + n_informative=2, + random_state=2, + n_clusters_per_class=1) + + rng = np.random.RandomState(2) + X += kwargs['noise'] * rng.uniform(size=X.shape) + + return train_test_split(X, + y.astype(str), + test_size=kwargs['test_size'], + random_state=5), X, y + + else: + return ValueError('error!') + + +def df_split(**kwargs): + _df = kwargs['df'] + + return train_test_split( + _df[['x', 'y']].to_numpy(), + _df['c'].to_numpy().astype(str), + test_size=kwargs['test_size'], + random_state=5), _df[['x', 'y']].to_numpy(), _df['c'].to_numpy() + + +def data_split(**kwargs): + + return train_test_split(kwargs['X'], + kwargs['y'].astype(str), + test_size=kwargs['test_size'], + random_state=5), kwargs['X'], kwargs['y'] diff --git a/apps/dash-svm-2022/utils/split_components.py b/apps/dash-svm-2022/utils/split_components.py new file mode 100644 index 000000000..039f98872 --- /dev/null +++ b/apps/dash-svm-2022/utils/split_components.py @@ -0,0 +1,490 @@ +import dash_bootstrap_components as dbc +import dash_daq as daq +import dash_canvas +from dash import html, dcc, dash_table + +dataset = html.Div( + [ + html.Strong("Dataset"), + dbc.RadioItems( + id={"type": "dataset_parameter", "index": "dataset"}, + className="btn-group", + inputClassName="btn-check", + labelClassName="btn btn-outline-primary", + labelCheckedClassName="active", + options=[ + {"label": "Moons", "value": "moons"}, + {"label": "Linearly Separable", "value": "LS"}, + {"label": "Circles", "value": "circles"}, + ], + value="LS", + ), + ] +) + +sample_size = html.Div( + [ + html.Strong("Sample Size"), + html.Br(), + daq.Slider( + id={"type": "dataset_parameter", "index": "sample_size"}, + min=100, + max=500, + value=100, + step=100, + marks={i * 100: i * 100 for i in range(6)}, + ), + ] +) + +noise_level = html.Div( + [ + html.Strong("Noise Level"), + html.Br(), + daq.Slider( + id={"type": "dataset_parameter", "index": "noise"}, + max=1, + value=0.4, + step=0.1, + marks={i / 5: i / 5 for i in range(1, 5)}, + ), + ], + style={"margin-bottom": "15px"}, +) + +test_size = html.Div( + [ + html.Strong("Test Size"), + html.Br(), + daq.Slider( + id={"type": "dataset_parameter", "index": "test_size"}, + max=0.5, + value=0.25, + step=0.05, + marks={i / 10: i / 10 for i in range(1, 5)}, + ), + ], + style={"margin-bottom": "15px"}, +) + +# ========================================== + + +def reuse_table(i): + return dash_table.DataTable( + id={"type": "tabs-table", "id": i}, + columns=[ + { + "name": i, + "id": i, + "type": "numeric" if i != "c" else "text", + "format": dash_table.Format.Format( + precision=4, scheme=dash_table.Format.Scheme.fixed + ) + if i not in ["s", "c"] + else None, + } + for i in ["s", "x", "y", "c"] + ], + # export_format ='csv', + fixed_rows={"headers": True}, + style_table={"height": "300px", "overflow": "auto"}, + page_action="none", + style_cell={ + "width": "{}%".format(100 / 4), + "textOverflow": "ellipsis", + "overflow": "hidden", + }, + ) + + +# ========================================== +# ========================================== +# ========================================== + +tab_1_content = html.Div( + [ + dbc.Card( + dbc.CardBody( + [ + dbc.Row(dataset), + html.Br(), + dbc.Row( + [ + dbc.Col(sample_size, width=4), + dbc.Col(noise_level, width=4), + dbc.Col(test_size, width=4), + ], + ), + ] + ), + className="mt-3", + ), + dbc.Card( + dbc.CardBody([dcc.Loading(reuse_table("t1"))]), + className="mt-3", + ), + ] +) + +tab_2_content = html.Div( + [ + dbc.Card( + dbc.CardBody( + [ + dcc.Upload( + id={"type": "uploader_parameter", "index": "uploader"}, + children=html.Div( + ["Drag and Drop or ", html.A("Select Files")] + ), + style={ + #'width': '72%', + "height": "45px", + "lineHeight": "45px", + "borderWidth": "1px", + "borderStyle": "dashed", + "borderRadius": "5px", + "textAlign": "center", + "margin": "10px", + }, + ) + ] + ), + className="mt-3", + ), + dbc.Card( + dcc.Loading( + [ + dbc.CardBody( + dbc.Row( + [ + dbc.Col( + html.Div( + [ + html.Strong("x"), + html.Br(), + dcc.Dropdown( + id={ + "type": "uploader_parameter", + "index": "x", + } + ), + ], + style={"margin-bottom": "15px"}, + ) + ), + dbc.Col( + html.Div( + [ + html.Strong("y"), + html.Br(), + dcc.Dropdown( + id={ + "type": "uploader_parameter", + "index": "y", + } + ), + ], + style={"margin-bottom": "15px"}, + ) + ), + dbc.Col( + html.Div( + [ + html.Strong("c"), + html.Br(), + dcc.Dropdown( + id={ + "type": "uploader_parameter", + "index": "c", + } + ), + ], + style={"margin-bottom": "15px"}, + ) + ), + dbc.Col( + html.Div( + [ + html.Strong("Test Size"), + html.Br(), + daq.Slider( + id={ + "type": "uploader_parameter", + "index": "test_size", + }, + max=0.5, + value=0.25, + step=0.05, + marks={ + i / 10: i / 10 for i in range(1, 5) + }, + ), + ], + style={"margin-bottom": "15px"}, + ) + ), + ] + ) + ) + ] + ), + className="mt-3", + ), + dbc.Card(dbc.CardBody([dcc.Loading(reuse_table("t2"))]), className="mt-3"), + ] +) + +tab_3_content = html.Div( + [ + dbc.Card( + dbc.CardBody( + [ + dbc.Row( + [ + dbc.Col( + [ + html.Div( + [ + dash_canvas.DashCanvas( # This will be switched to the annotation component in the future. + id={ + "type": "canvas_parameter", + "index": "canvas", + }, + filename="/assets/canvas_bg.png", # get_asset_url('bg.png'), + lineWidth=5, + goButtonTitle="Generate", + lineColor="#509188", + # width=canvas_width, + hide_buttons=[ + "zoom", + "pan", + "line", + "pencil", + "rectangle", + "select", + ], + ) + ], + className="canvas_container", + ) + ], + width=8, + ), + dbc.Col( + [ + dbc.Row( + [ + html.Div( + dbc.Button( + "Toggle", + id={ + "type": "canvas_parameter", + "index": "toggle", + }, + ), + style={"margin-top": "45px"}, + ) + ] + ), + html.Br(), + dbc.Row( + [ + html.Div( + [ + html.Strong("Test Size"), + html.Br(), + daq.Slider( + id={ + "type": "canvas_parameter", + "index": "test_size", + }, + max=0.5, + value=0.25, + step=0.05, + marks={ + i / 10: i / 10 + for i in range(1, 5) + }, + ), + ] + ) + ] + ), + ], + width=4, + ), + ] + ) + ] + ), + className="mt-3", + ), + dbc.Card(dbc.CardBody([dcc.Loading(reuse_table("t3"))]), className="mt-3"), + ] +) + +# ========================================== +uploader_btn = html.Div( + [ + offcanvas_btn := dbc.Button( + "SELECT DATA", outline=True, color="primary", size="lg" + ), + offcanvas := dbc.Offcanvas( + [ + tabs := dbc.Tabs( + [ + tab_1 := dbc.Tab(label="Scikit-learn Datasets"), + tab_2 := dbc.Tab(label="Upload Data"), + tab_3 := dbc.Tab(label="Hand Painted"), + ], + active_tab="tab-0", + ), + offcanvas_content := html.Div(), + dbc.Card( + html.Div( + [ + dbc.CardBody( + [save_btn := dbc.Button("SAVE", color="success")] + ) + ], + className="d-grid gap-2 mx-auto", + ), + className="mt-3", + ), + ], + placement="end", + is_open=False, + title="DATA UPLOAD", + style={"width": "85%"}, + ), + ] +) + +# ========================================== +threshold = html.Div( + [ + html.Strong("Threshold"), + html.Br(), + daq.Knob( + id={"type": "svm_parameter", "index": "threshold"}, + min=0, + max=1, + value=0.5, + size=100, + ), + threshold_btn := dbc.Button("RESET THRESHOLD"), + ], + style={"margin-bottom": "15px"}, +) + +# ========================================== +kernel = html.Div( + [ + html.Strong("Kernel"), + html.Br(), + dcc.Dropdown( + id={"type": "svm_parameter", "index": "kernel"}, + options={ + "rbf": "Radial basis function (RBF)", + "linear": "Linear", + "poly": "Polynomial", + "sigmoid": "Sigmoid", + }, + value="rbf", + style={"width": "75%"}, + ), + ], + style={"margin-bottom": "15px"}, +) + +formula = html.Div(latex_formula := dcc.Markdown(mathjax=True)) + +cost = html.Div( + [ + html.Strong("Cost (C)"), + html.Br(), + daq.Slider( + id={"type": "svm_parameter", "index": "cost_power"}, + min=-2, + max=4, + value=0, + marks={i: 10**i for i in range(-2, 5)}, + ), + html.Br(), + daq.Slider( + id={"type": "svm_parameter", "index": "cost_coef"}, + min=1, + max=9, + value=1, + step=1, + handleLabel={ + # "showCurrentValue": True, + "label": "COST" + }, + ), + ], + style={"margin-bottom": "15px"}, +) + +degree = html.Div( + [ + html.Strong("Degree"), + html.Br(), + daq.Slider( + id={"type": "svm_parameter", "index": "degree"}, + min=2, + max=10, + value=2, + step=1, + marks={i: i for i in range(2, 9, 2)}, + ), + ], + style={"margin-bottom": "15px"}, +) + +gamma = html.Div( + [ + html.Strong("Gamma"), + html.Br(), + daq.Slider( + id={"type": "svm_parameter", "index": "gamma_power"}, + min=-5, + max=0, + value=-1, + marks={i: 10**i for i in range(-5, 1)}, + ), + html.Br(), + daq.Slider( + id={"type": "svm_parameter", "index": "gamma_coef"}, + min=1, + max=9, + value=5, + step=1, + handleLabel={ + # "showCurrentValue": True, + "label": "GAMMA", + "style": {"height": "15px"}, + }, + ), + ], + style={"margin-bottom": "15px"}, +) + +shrinking = html.Div( + [ + html.Strong("Shrinking"), + dbc.RadioItems( + id={"type": "svm_parameter", "index": "shrinking"}, + className="btn-group", + inputClassName="btn-check", + labelClassName="btn btn-outline-primary", + labelCheckedClassName="active", + options=[ + {"label": "Disable", "value": False}, + {"label": "Enable", "value": True}, + ], + value=True, + ), + ] +)