diff --git a/docs/examples/tutorial/performance_variability_boxplots.ipynb b/docs/examples/tutorial/performance_variability_boxplots.ipynb new file mode 100644 index 00000000..4820cbe8 --- /dev/null +++ b/docs/examples/tutorial/performance_variability_boxplots.ipynb @@ -0,0 +1,1929 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Performance Variability Boxplots\n", + "\n", + "Performance variability boxplots provide an insight into the runtime distribution and its varibility across callsites. Boxplots are calculated to represent the range of the distribution and outliers (dots) correspond which are beyond the 1.5*IQR. Additionally, several statistical measures like mean, variance, kurtosis, skewness are also provided." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os, sys\n", + "from IPython.display import HTML, display\n", + "\n", + "# Hatchet imports\n", + "import hatchet as ht\n", + "from hatchet.util.unify_ensemble import unify_ensemble\n", + "from hatchet.util.boxplot import BoxPlot" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we will construct a **hatchet.GraphFrame** using a sample dataset in our repository, **caliper-lulesh-json**. " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "data_dir = os.path.realpath(\"../../../hatchet/tests/data\")\n", + "data_path = os.path.join(data_dir, \"caliper-lulesh-json/lulesh-annotation-profile.json\")\n", + "\n", + "gf_list = []\n", + "for i in range(10):\n", + " gf = ht.GraphFrame.from_caliper(data_path)\n", + " gf.dataset = \"dset{}\".format(i)\n", + " \n", + " gf_list.append(gf)\n", + "\n", + "gf_ensemble = unify_ensemble(gf_list)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namenidtimetime (inc)hatchet_nid
noderankdataset
{'name': 'main', 'type': 'region'}0dset9main0121489.05882425.00
4dset7main0118953.05905595.00
5dset7main0133256.05877613.00
6dset7main0114035.05870933.00
7dset7main0137098.05898724.00
........................
{'name': 'TimeIncrement', 'type': 'region'}1dset3TimeIncrement12212402.0212402.023
2dset3TimeIncrement12171635.0171635.023
3dset3TimeIncrement12323519.0323519.023
dset5TimeIncrement12323519.0323519.023
7dset0TimeIncrement12540.0540.023
\n", + "

1920 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " name nid \\\n", + "node rank dataset \n", + "{'name': 'main', 'type': 'region'} 0 dset9 main 0 \n", + " 4 dset7 main 0 \n", + " 5 dset7 main 0 \n", + " 6 dset7 main 0 \n", + " 7 dset7 main 0 \n", + "... ... ... \n", + "{'name': 'TimeIncrement', 'type': 'region'} 1 dset3 TimeIncrement 12 \n", + " 2 dset3 TimeIncrement 12 \n", + " 3 dset3 TimeIncrement 12 \n", + " dset5 TimeIncrement 12 \n", + " 7 dset0 TimeIncrement 12 \n", + "\n", + " time \\\n", + "node rank dataset \n", + "{'name': 'main', 'type': 'region'} 0 dset9 121489.0 \n", + " 4 dset7 118953.0 \n", + " 5 dset7 133256.0 \n", + " 6 dset7 114035.0 \n", + " 7 dset7 137098.0 \n", + "... ... \n", + "{'name': 'TimeIncrement', 'type': 'region'} 1 dset3 212402.0 \n", + " 2 dset3 171635.0 \n", + " 3 dset3 323519.0 \n", + " dset5 323519.0 \n", + " 7 dset0 540.0 \n", + "\n", + " time (inc) \\\n", + "node rank dataset \n", + "{'name': 'main', 'type': 'region'} 0 dset9 5882425.0 \n", + " 4 dset7 5905595.0 \n", + " 5 dset7 5877613.0 \n", + " 6 dset7 5870933.0 \n", + " 7 dset7 5898724.0 \n", + "... ... \n", + "{'name': 'TimeIncrement', 'type': 'region'} 1 dset3 212402.0 \n", + " 2 dset3 171635.0 \n", + " 3 dset3 323519.0 \n", + " dset5 323519.0 \n", + " 7 dset0 540.0 \n", + "\n", + " hatchet_nid \n", + "node rank dataset \n", + "{'name': 'main', 'type': 'region'} 0 dset9 0 \n", + " 4 dset7 0 \n", + " 5 dset7 0 \n", + " 6 dset7 0 \n", + " 7 dset7 0 \n", + "... ... \n", + "{'name': 'TimeIncrement', 'type': 'region'} 1 dset3 23 \n", + " 2 dset3 23 \n", + " 3 dset3 23 \n", + " dset5 23 \n", + " 7 dset0 23 \n", + "\n", + "[1920 rows x 5 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gf_ensemble.dataframe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, using the **hatchet.GraphFrame**, we can calculate the data required for performance variability boxplot using an exposed hatchet API, **Boxplot**.\n", + "\n", + "The interface excepts the following attributes:\n", + "- multi_index_gf - Multi-indexed GraphFrame (required).\n", + "- drop_index - The index to drop in the ht.GraphFrame.dataframe to compute the variability (e.g., rank, dataset) (optional).\n", + "- metrics - list of inclusive/exclusive metrics (optional) [default = inc_metrics + exc_metrics]." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Case: Multi-index gf has exactly 2 indexes" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nametimetime (inc)nidhatchet_nid
nodedataset
{'name': 'main', 'type': 'region'}dset0main119373.505889901.5000
dset1main119373.505889901.5000
dset2main119373.505889901.5000
dset3main119373.505889901.5000
dset4main119373.505889901.5000
.....................
{'name': 'TimeIncrement', 'type': 'region'}dset5TimeIncrement263538.75263538.751223
dset6TimeIncrement263538.75263538.751223
dset7TimeIncrement263538.75263538.751223
dset8TimeIncrement263538.75263538.751223
dset9TimeIncrement263538.75263538.751223
\n", + "

240 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " name time \\\n", + "node dataset \n", + "{'name': 'main', 'type': 'region'} dset0 main 119373.50 \n", + " dset1 main 119373.50 \n", + " dset2 main 119373.50 \n", + " dset3 main 119373.50 \n", + " dset4 main 119373.50 \n", + "... ... ... \n", + "{'name': 'TimeIncrement', 'type': 'region'} dset5 TimeIncrement 263538.75 \n", + " dset6 TimeIncrement 263538.75 \n", + " dset7 TimeIncrement 263538.75 \n", + " dset8 TimeIncrement 263538.75 \n", + " dset9 TimeIncrement 263538.75 \n", + "\n", + " time (inc) nid \\\n", + "node dataset \n", + "{'name': 'main', 'type': 'region'} dset0 5889901.50 0 \n", + " dset1 5889901.50 0 \n", + " dset2 5889901.50 0 \n", + " dset3 5889901.50 0 \n", + " dset4 5889901.50 0 \n", + "... ... ... \n", + "{'name': 'TimeIncrement', 'type': 'region'} dset5 263538.75 12 \n", + " dset6 263538.75 12 \n", + " dset7 263538.75 12 \n", + " dset8 263538.75 12 \n", + " dset9 263538.75 12 \n", + "\n", + " hatchet_nid \n", + "node dataset \n", + "{'name': 'main', 'type': 'region'} dset0 0 \n", + " dset1 0 \n", + " dset2 0 \n", + " dset3 0 \n", + " dset4 0 \n", + "... ... \n", + "{'name': 'TimeIncrement', 'type': 'region'} dset5 23 \n", + " dset6 23 \n", + " dset7 23 \n", + " dset8 23 \n", + " dset9 23 \n", + "\n", + "[240 rows x 5 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gf_ensemble_copy = gf_ensemble.copy()\n", + "gf_ensemble_copy.dataframe = gf_ensemble_copy.dataframe.groupby([\"node\",\"dataset\"]).agg({'name': 'first', 'time': \"mean\", 'time (inc)': \"mean\", 'nid': 'first', 'hatchet_nid': 'first'})\n", + "gf_ensemble_copy.dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "bp = BoxPlot(multi_index_gf=gf_ensemble_copy, metrics=[\"time\", \"time (inc)\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'time': ,\n", + " 'time (inc)': }" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bp.gf" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
qminmaxmeanvarimbkurtskewnamenidhatchet_nid
node
{'name': 'main', 'type': 'region'}[119373.5, 119373.5, 119373.5, 119373.5, 11937...119373.500119373.500119373.5000.00.0-3.00.0main00
{'name': 'LagrangeLeapFrog', 'type': 'region'}[894.5, 894.5, 894.5, 894.5, 894.5]894.500894.500894.5000.00.0-3.00.0LagrangeLeapFrog11
{'name': 'CalcTimeConstraintsForElems', 'type': 'region'}[7439.0, 7439.0, 7439.0, 7439.0, 7439.0]7439.0007439.0007439.0000.00.0-3.00.0CalcTimeConstraintsForElems92
{'name': 'CalcCourantConstraintForElems', 'type': 'region'}[28915.875, 28915.875, 28915.875, 28915.875, 2...28915.87528915.87528915.8750.00.0-3.00.0CalcCourantConstraintForElems103
{'name': 'CalcHydroConstraintForElems', 'type': 'region'}[10302.875, 10302.875, 10302.875, 10302.875, 1...10302.87510302.87510302.8750.00.0-3.00.0CalcHydroConstraintForElems114
{'name': 'LagrangeElements', 'type': 'region'}[713.875, 713.875, 713.875, 713.875, 713.875]713.875713.875713.8750.00.0-3.00.0LagrangeElements25
{'name': 'ApplyMaterialPropertiesForElems', 'type': 'region'}[14166.625, 14166.625, 14166.625, 14166.625, 1...14166.62514166.62514166.6250.00.0-3.00.0ApplyMaterialPropertiesForElems36
{'name': 'EvalEOSForElems', 'type': 'region'}[254763.75, 254763.75, 254763.75, 254763.75, 2...254763.750254763.750254763.7500.00.0-3.00.0EvalEOSForElems47
{'name': 'CalcEnergyForElems', 'type': 'region'}[287552.875, 287552.875, 287552.875, 287552.87...287552.875287552.875287552.8750.00.0-3.00.0CalcEnergyForElems58
{'name': 'CalcPressureForElems', 'type': 'region'}[177454.875, 177454.875, 177454.875, 177454.87...177454.875177454.875177454.8750.00.0-3.00.0CalcPressureForElems69
{'name': 'CalcSoundSpeedForElems', 'type': 'region'}[8866.25, 8866.25, 8866.25, 8866.25, 8866.25]8866.2508866.2508866.2500.00.0-3.00.0CalcSoundSpeedForElems710
{'name': 'CalcLagrangeElements', 'type': 'region'}[24217.5, 24217.5, 24217.5, 24217.5, 24217.5]24217.50024217.50024217.5000.00.0-3.00.0CalcLagrangeElements1911
{'name': 'CalcKinematicsForElems', 'type': 'region'}[740734.0, 740734.0, 740734.0, 740734.0, 74073...740734.000740734.000740734.0000.00.0-3.00.0CalcKinematicsForElems2012
{'name': 'CalcQForElems', 'type': 'region'}[281926.625, 281926.625, 281926.625, 281926.62...281926.625281926.625281926.6250.00.0-3.00.0CalcQForElems2113
{'name': 'CalcMonotonicQGradientsForElems', 'type': 'region'}[393558.125, 393558.125, 393558.125, 393558.12...393558.125393558.125393558.1250.00.0-3.00.0CalcMonotonicQGradientsForElems2214
{'name': 'CalcMonotonicQRegionForElems', 'type': 'region'}[200505.625, 200505.625, 200505.625, 200505.62...200505.625200505.625200505.6250.00.0-3.00.0CalcMonotonicQRegionForElems2315
{'name': 'UpdateVolumesForElems', 'type': 'region'}[12432.125, 12432.125, 12432.125, 12432.125, 1...12432.12512432.12512432.1250.00.0-3.00.0UpdateVolumesForElems816
{'name': 'LagrangeNodal', 'type': 'region'}[259490.0, 259490.0, 259490.0, 259490.0, 25949...259490.000259490.000259490.0000.00.0-3.00.0LagrangeNodal1317
{'name': 'CalcForceForNodes', 'type': 'region'}[295260.0, 295260.0, 295260.0, 295260.0, 29526...295260.000295260.000295260.0000.00.0-3.00.0CalcForceForNodes1418
{'name': 'CalcVolumeForceForElems', 'type': 'region'}[10871.0, 10871.0, 10871.0, 10871.0, 10871.0]10871.00010871.00010871.0000.00.0-3.00.0CalcVolumeForceForElems1519
{'name': 'CalcHourglassControlForElems', 'type': 'region'}[574309.0, 574309.0, 574309.0, 574309.0, 57430...574309.000574309.000574309.0000.00.0-3.00.0CalcHourglassControlForElems1720
{'name': 'CalcFBHourglassForceForElems', 'type': 'region'}[1197360.375, 1197360.375, 1197360.375, 119736...1197360.3751197360.3751197360.3750.00.0-3.00.0CalcFBHourglassForceForElems1821
{'name': 'IntegrateStressForElems', 'type': 'region'}[725254.375, 725254.375, 725254.375, 725254.37...725254.375725254.375725254.3750.00.0-3.00.0IntegrateStressForElems1622
{'name': 'TimeIncrement', 'type': 'region'}[263538.75, 263538.75, 263538.75, 263538.75, 2...263538.750263538.750263538.7500.00.0-3.00.0TimeIncrement1223
\n", + "
" + ], + "text/plain": [ + " q \\\n", + "node \n", + "{'name': 'main', 'type': 'region'} [119373.5, 119373.5, 119373.5, 119373.5, 11937... \n", + "{'name': 'LagrangeLeapFrog', 'type': 'region'} [894.5, 894.5, 894.5, 894.5, 894.5] \n", + "{'name': 'CalcTimeConstraintsForElems', 'type':... [7439.0, 7439.0, 7439.0, 7439.0, 7439.0] \n", + "{'name': 'CalcCourantConstraintForElems', 'type... [28915.875, 28915.875, 28915.875, 28915.875, 2... \n", + "{'name': 'CalcHydroConstraintForElems', 'type':... [10302.875, 10302.875, 10302.875, 10302.875, 1... \n", + "{'name': 'LagrangeElements', 'type': 'region'} [713.875, 713.875, 713.875, 713.875, 713.875] \n", + "{'name': 'ApplyMaterialPropertiesForElems', 'ty... [14166.625, 14166.625, 14166.625, 14166.625, 1... \n", + "{'name': 'EvalEOSForElems', 'type': 'region'} [254763.75, 254763.75, 254763.75, 254763.75, 2... \n", + "{'name': 'CalcEnergyForElems', 'type': 'region'} [287552.875, 287552.875, 287552.875, 287552.87... \n", + "{'name': 'CalcPressureForElems', 'type': 'region'} [177454.875, 177454.875, 177454.875, 177454.87... \n", + "{'name': 'CalcSoundSpeedForElems', 'type': 'reg... [8866.25, 8866.25, 8866.25, 8866.25, 8866.25] \n", + "{'name': 'CalcLagrangeElements', 'type': 'region'} [24217.5, 24217.5, 24217.5, 24217.5, 24217.5] \n", + "{'name': 'CalcKinematicsForElems', 'type': 'reg... [740734.0, 740734.0, 740734.0, 740734.0, 74073... \n", + "{'name': 'CalcQForElems', 'type': 'region'} [281926.625, 281926.625, 281926.625, 281926.62... \n", + "{'name': 'CalcMonotonicQGradientsForElems', 'ty... [393558.125, 393558.125, 393558.125, 393558.12... \n", + "{'name': 'CalcMonotonicQRegionForElems', 'type'... [200505.625, 200505.625, 200505.625, 200505.62... \n", + "{'name': 'UpdateVolumesForElems', 'type': 'regi... [12432.125, 12432.125, 12432.125, 12432.125, 1... \n", + "{'name': 'LagrangeNodal', 'type': 'region'} [259490.0, 259490.0, 259490.0, 259490.0, 25949... \n", + "{'name': 'CalcForceForNodes', 'type': 'region'} [295260.0, 295260.0, 295260.0, 295260.0, 29526... \n", + "{'name': 'CalcVolumeForceForElems', 'type': 're... [10871.0, 10871.0, 10871.0, 10871.0, 10871.0] \n", + "{'name': 'CalcHourglassControlForElems', 'type'... [574309.0, 574309.0, 574309.0, 574309.0, 57430... \n", + "{'name': 'CalcFBHourglassForceForElems', 'type'... [1197360.375, 1197360.375, 1197360.375, 119736... \n", + "{'name': 'IntegrateStressForElems', 'type': 're... [725254.375, 725254.375, 725254.375, 725254.37... \n", + "{'name': 'TimeIncrement', 'type': 'region'} [263538.75, 263538.75, 263538.75, 263538.75, 2... \n", + "\n", + " min max \\\n", + "node \n", + "{'name': 'main', 'type': 'region'} 119373.500 119373.500 \n", + "{'name': 'LagrangeLeapFrog', 'type': 'region'} 894.500 894.500 \n", + "{'name': 'CalcTimeConstraintsForElems', 'type':... 7439.000 7439.000 \n", + "{'name': 'CalcCourantConstraintForElems', 'type... 28915.875 28915.875 \n", + "{'name': 'CalcHydroConstraintForElems', 'type':... 10302.875 10302.875 \n", + "{'name': 'LagrangeElements', 'type': 'region'} 713.875 713.875 \n", + "{'name': 'ApplyMaterialPropertiesForElems', 'ty... 14166.625 14166.625 \n", + "{'name': 'EvalEOSForElems', 'type': 'region'} 254763.750 254763.750 \n", + "{'name': 'CalcEnergyForElems', 'type': 'region'} 287552.875 287552.875 \n", + "{'name': 'CalcPressureForElems', 'type': 'region'} 177454.875 177454.875 \n", + "{'name': 'CalcSoundSpeedForElems', 'type': 'reg... 8866.250 8866.250 \n", + "{'name': 'CalcLagrangeElements', 'type': 'region'} 24217.500 24217.500 \n", + "{'name': 'CalcKinematicsForElems', 'type': 'reg... 740734.000 740734.000 \n", + "{'name': 'CalcQForElems', 'type': 'region'} 281926.625 281926.625 \n", + "{'name': 'CalcMonotonicQGradientsForElems', 'ty... 393558.125 393558.125 \n", + "{'name': 'CalcMonotonicQRegionForElems', 'type'... 200505.625 200505.625 \n", + "{'name': 'UpdateVolumesForElems', 'type': 'regi... 12432.125 12432.125 \n", + "{'name': 'LagrangeNodal', 'type': 'region'} 259490.000 259490.000 \n", + "{'name': 'CalcForceForNodes', 'type': 'region'} 295260.000 295260.000 \n", + "{'name': 'CalcVolumeForceForElems', 'type': 're... 10871.000 10871.000 \n", + "{'name': 'CalcHourglassControlForElems', 'type'... 574309.000 574309.000 \n", + "{'name': 'CalcFBHourglassForceForElems', 'type'... 1197360.375 1197360.375 \n", + "{'name': 'IntegrateStressForElems', 'type': 're... 725254.375 725254.375 \n", + "{'name': 'TimeIncrement', 'type': 'region'} 263538.750 263538.750 \n", + "\n", + " mean var imb \\\n", + "node \n", + "{'name': 'main', 'type': 'region'} 119373.500 0.0 0.0 \n", + "{'name': 'LagrangeLeapFrog', 'type': 'region'} 894.500 0.0 0.0 \n", + "{'name': 'CalcTimeConstraintsForElems', 'type':... 7439.000 0.0 0.0 \n", + "{'name': 'CalcCourantConstraintForElems', 'type... 28915.875 0.0 0.0 \n", + "{'name': 'CalcHydroConstraintForElems', 'type':... 10302.875 0.0 0.0 \n", + "{'name': 'LagrangeElements', 'type': 'region'} 713.875 0.0 0.0 \n", + "{'name': 'ApplyMaterialPropertiesForElems', 'ty... 14166.625 0.0 0.0 \n", + "{'name': 'EvalEOSForElems', 'type': 'region'} 254763.750 0.0 0.0 \n", + "{'name': 'CalcEnergyForElems', 'type': 'region'} 287552.875 0.0 0.0 \n", + "{'name': 'CalcPressureForElems', 'type': 'region'} 177454.875 0.0 0.0 \n", + "{'name': 'CalcSoundSpeedForElems', 'type': 'reg... 8866.250 0.0 0.0 \n", + "{'name': 'CalcLagrangeElements', 'type': 'region'} 24217.500 0.0 0.0 \n", + "{'name': 'CalcKinematicsForElems', 'type': 'reg... 740734.000 0.0 0.0 \n", + "{'name': 'CalcQForElems', 'type': 'region'} 281926.625 0.0 0.0 \n", + "{'name': 'CalcMonotonicQGradientsForElems', 'ty... 393558.125 0.0 0.0 \n", + "{'name': 'CalcMonotonicQRegionForElems', 'type'... 200505.625 0.0 0.0 \n", + "{'name': 'UpdateVolumesForElems', 'type': 'regi... 12432.125 0.0 0.0 \n", + "{'name': 'LagrangeNodal', 'type': 'region'} 259490.000 0.0 0.0 \n", + "{'name': 'CalcForceForNodes', 'type': 'region'} 295260.000 0.0 0.0 \n", + "{'name': 'CalcVolumeForceForElems', 'type': 're... 10871.000 0.0 0.0 \n", + "{'name': 'CalcHourglassControlForElems', 'type'... 574309.000 0.0 0.0 \n", + "{'name': 'CalcFBHourglassForceForElems', 'type'... 1197360.375 0.0 0.0 \n", + "{'name': 'IntegrateStressForElems', 'type': 're... 725254.375 0.0 0.0 \n", + "{'name': 'TimeIncrement', 'type': 'region'} 263538.750 0.0 0.0 \n", + "\n", + " kurt skew \\\n", + "node \n", + "{'name': 'main', 'type': 'region'} -3.0 0.0 \n", + "{'name': 'LagrangeLeapFrog', 'type': 'region'} -3.0 0.0 \n", + "{'name': 'CalcTimeConstraintsForElems', 'type':... -3.0 0.0 \n", + "{'name': 'CalcCourantConstraintForElems', 'type... -3.0 0.0 \n", + "{'name': 'CalcHydroConstraintForElems', 'type':... -3.0 0.0 \n", + "{'name': 'LagrangeElements', 'type': 'region'} -3.0 0.0 \n", + "{'name': 'ApplyMaterialPropertiesForElems', 'ty... -3.0 0.0 \n", + "{'name': 'EvalEOSForElems', 'type': 'region'} -3.0 0.0 \n", + "{'name': 'CalcEnergyForElems', 'type': 'region'} -3.0 0.0 \n", + "{'name': 'CalcPressureForElems', 'type': 'region'} -3.0 0.0 \n", + "{'name': 'CalcSoundSpeedForElems', 'type': 'reg... -3.0 0.0 \n", + "{'name': 'CalcLagrangeElements', 'type': 'region'} -3.0 0.0 \n", + "{'name': 'CalcKinematicsForElems', 'type': 'reg... -3.0 0.0 \n", + "{'name': 'CalcQForElems', 'type': 'region'} -3.0 0.0 \n", + "{'name': 'CalcMonotonicQGradientsForElems', 'ty... -3.0 0.0 \n", + "{'name': 'CalcMonotonicQRegionForElems', 'type'... -3.0 0.0 \n", + "{'name': 'UpdateVolumesForElems', 'type': 'regi... -3.0 0.0 \n", + "{'name': 'LagrangeNodal', 'type': 'region'} -3.0 0.0 \n", + "{'name': 'CalcForceForNodes', 'type': 'region'} -3.0 0.0 \n", + "{'name': 'CalcVolumeForceForElems', 'type': 're... -3.0 0.0 \n", + "{'name': 'CalcHourglassControlForElems', 'type'... -3.0 0.0 \n", + "{'name': 'CalcFBHourglassForceForElems', 'type'... -3.0 0.0 \n", + "{'name': 'IntegrateStressForElems', 'type': 're... -3.0 0.0 \n", + "{'name': 'TimeIncrement', 'type': 'region'} -3.0 0.0 \n", + "\n", + " name \\\n", + "node \n", + "{'name': 'main', 'type': 'region'} main \n", + "{'name': 'LagrangeLeapFrog', 'type': 'region'} LagrangeLeapFrog \n", + "{'name': 'CalcTimeConstraintsForElems', 'type':... CalcTimeConstraintsForElems \n", + "{'name': 'CalcCourantConstraintForElems', 'type... CalcCourantConstraintForElems \n", + "{'name': 'CalcHydroConstraintForElems', 'type':... CalcHydroConstraintForElems \n", + "{'name': 'LagrangeElements', 'type': 'region'} LagrangeElements \n", + "{'name': 'ApplyMaterialPropertiesForElems', 'ty... ApplyMaterialPropertiesForElems \n", + "{'name': 'EvalEOSForElems', 'type': 'region'} EvalEOSForElems \n", + "{'name': 'CalcEnergyForElems', 'type': 'region'} CalcEnergyForElems \n", + "{'name': 'CalcPressureForElems', 'type': 'region'} CalcPressureForElems \n", + "{'name': 'CalcSoundSpeedForElems', 'type': 'reg... CalcSoundSpeedForElems \n", + "{'name': 'CalcLagrangeElements', 'type': 'region'} CalcLagrangeElements \n", + "{'name': 'CalcKinematicsForElems', 'type': 'reg... CalcKinematicsForElems \n", + "{'name': 'CalcQForElems', 'type': 'region'} CalcQForElems \n", + "{'name': 'CalcMonotonicQGradientsForElems', 'ty... CalcMonotonicQGradientsForElems \n", + "{'name': 'CalcMonotonicQRegionForElems', 'type'... CalcMonotonicQRegionForElems \n", + "{'name': 'UpdateVolumesForElems', 'type': 'regi... UpdateVolumesForElems \n", + "{'name': 'LagrangeNodal', 'type': 'region'} LagrangeNodal \n", + "{'name': 'CalcForceForNodes', 'type': 'region'} CalcForceForNodes \n", + "{'name': 'CalcVolumeForceForElems', 'type': 're... CalcVolumeForceForElems \n", + "{'name': 'CalcHourglassControlForElems', 'type'... CalcHourglassControlForElems \n", + "{'name': 'CalcFBHourglassForceForElems', 'type'... CalcFBHourglassForceForElems \n", + "{'name': 'IntegrateStressForElems', 'type': 're... IntegrateStressForElems \n", + "{'name': 'TimeIncrement', 'type': 'region'} TimeIncrement \n", + "\n", + " nid hatchet_nid \n", + "node \n", + "{'name': 'main', 'type': 'region'} 0 0 \n", + "{'name': 'LagrangeLeapFrog', 'type': 'region'} 1 1 \n", + "{'name': 'CalcTimeConstraintsForElems', 'type':... 9 2 \n", + "{'name': 'CalcCourantConstraintForElems', 'type... 10 3 \n", + "{'name': 'CalcHydroConstraintForElems', 'type':... 11 4 \n", + "{'name': 'LagrangeElements', 'type': 'region'} 2 5 \n", + "{'name': 'ApplyMaterialPropertiesForElems', 'ty... 3 6 \n", + "{'name': 'EvalEOSForElems', 'type': 'region'} 4 7 \n", + "{'name': 'CalcEnergyForElems', 'type': 'region'} 5 8 \n", + "{'name': 'CalcPressureForElems', 'type': 'region'} 6 9 \n", + "{'name': 'CalcSoundSpeedForElems', 'type': 'reg... 7 10 \n", + "{'name': 'CalcLagrangeElements', 'type': 'region'} 19 11 \n", + "{'name': 'CalcKinematicsForElems', 'type': 'reg... 20 12 \n", + "{'name': 'CalcQForElems', 'type': 'region'} 21 13 \n", + "{'name': 'CalcMonotonicQGradientsForElems', 'ty... 22 14 \n", + "{'name': 'CalcMonotonicQRegionForElems', 'type'... 23 15 \n", + "{'name': 'UpdateVolumesForElems', 'type': 'regi... 8 16 \n", + "{'name': 'LagrangeNodal', 'type': 'region'} 13 17 \n", + "{'name': 'CalcForceForNodes', 'type': 'region'} 14 18 \n", + "{'name': 'CalcVolumeForceForElems', 'type': 're... 15 19 \n", + "{'name': 'CalcHourglassControlForElems', 'type'... 17 20 \n", + "{'name': 'CalcFBHourglassForceForElems', 'type'... 18 21 \n", + "{'name': 'IntegrateStressForElems', 'type': 're... 16 22 \n", + "{'name': 'TimeIncrement', 'type': 'region'} 12 23 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bp.gf['time'].dataframe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Case: Multi-index gf has more than 2 indexes." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "ename": "Exception", + "evalue": "multi_index_gf contains 3 indexes = ['node', 'rank', 'dataset']. ht.util.BoxPlot is limited to processing GraphFrames with 2 indexes. Please specify the `drop_index` by which BoxPlot API will compute the distribution to avoid ambiguity.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mException\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/rv/jkb6mxgd0mbgz87qvq9356zw0000gn/T/ipykernel_66892/3489522065.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# TODO: reword the exception, add an example.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m# TODO: drop_index => drop_index_levels.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mbp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBoxPlot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmulti_index_gf\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgf_ensemble\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"time\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Work/llnl/hatchet/hatchet/util/boxplot.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, multi_index_gf, drop_index_levels, metrics)\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmulti_index_gf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"_hatchet_nid\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmulti_index_gf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"node\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hatchet_nid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 40\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop_indexes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput_indexes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBoxPlot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalidate_drop_index_level\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmulti_index_gf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdrop_index_levels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 41\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetrics\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBoxPlot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalidate_metrics\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmulti_index_gf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Work/llnl/hatchet/hatchet/util/boxplot.py\u001b[0m in \u001b[0;36mvalidate_drop_index_level\u001b[0;34m(multi_index_gf, drop_index_levels)\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;31m# Validate if only 2 indexes are provided. Else, warn the user to pass `drop_column`.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 76\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_index_levels\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 77\u001b[0;31m raise Exception(\n\u001b[0m\u001b[1;32m 78\u001b[0m \u001b[0;34mf\"multi_index_gf contains {len(df_index_levels)} indexes = {df_index_levels}. ht.util.BoxPlot is limited to processing GraphFrames with 2 indexes. Please specify the `drop_index` by which BoxPlot API will compute the distribution to avoid ambiguity.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 79\u001b[0m )\n", + "\u001b[0;31mException\u001b[0m: multi_index_gf contains 3 indexes = ['node', 'rank', 'dataset']. ht.util.BoxPlot is limited to processing GraphFrames with 2 indexes. Please specify the `drop_index` by which BoxPlot API will compute the distribution to avoid ambiguity." + ] + } + ], + "source": [ + "# TODO: reword the exception, add an example. \n", + "# TODO: drop_index => drop_index_levels.\n", + "bp = BoxPlot(multi_index_gf=gf_ensemble, metrics=[\"time\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Case: Metric not found in dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "ename": "Exception", + "evalue": "time (incx) not found in the gf.dataframe.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mException\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/rv/jkb6mxgd0mbgz87qvq9356zw0000gn/T/ipykernel_66892/1974429012.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mbp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBoxPlot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmulti_index_gf\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgf_ensemble\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"time (incx)\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdrop_index_levels\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"rank\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Work/llnl/hatchet/hatchet/util/boxplot.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, multi_index_gf, drop_index_levels, metrics)\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop_indexes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput_indexes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBoxPlot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalidate_drop_index_level\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmulti_index_gf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdrop_index_levels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 41\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetrics\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBoxPlot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalidate_metrics\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmulti_index_gf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0magg_columns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"_hatchet_nid\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput_indexes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Work/llnl/hatchet/hatchet/util/boxplot.py\u001b[0m in \u001b[0;36mvalidate_metrics\u001b[0;34m(multi_index_gf, metrics)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mmetric\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 96\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmetric\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmulti_index_gf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataframe\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 97\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"{metric} not found in the gf.dataframe.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 98\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmetrics\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mException\u001b[0m: time (incx) not found in the gf.dataframe." + ] + } + ], + "source": [ + "bp = BoxPlot(multi_index_gf=gf_ensemble, metrics=[\"time (incx)\"], drop_index_levels=[\"rank\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Case: Drop index by 'rank'." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: make `drop_index` a list." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jarus/Work/llnl/hatchet/venv/lib/python3.9/site-packages/IPython/core/interactiveshell.py:3441: PerformanceWarning: indexing past lexsort depth may impact performance.\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n" + ] + } + ], + "source": [ + "bp = BoxPlot(multi_index_gf=gf_ensemble, drop_index_levels=[\"rank\"], metrics=[\"time\", \"time (inc)\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Boxplot** API calculates the results and stores as a GraphFrames in a dictionary (i.e., `tgt` and `bkg`). " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
qminmaxmeanvarimbkurtskewhatchet_nidnidname
nodedataset
{'name': 'main', 'type': 'region'}dset9[105528.0, 113072.25, 116494.0, 124430.75, 137...105528.0137098.0119373.501.044980e+080.148479-0.9421850.54367300main
dset7[105528.0, 113072.25, 116494.0, 124430.75, 137...105528.0137098.0119373.501.044980e+080.148479-0.9421850.54367300main
dset1[105528.0, 113072.25, 116494.0, 124430.75, 137...105528.0137098.0119373.501.044980e+080.148479-0.9421850.54367300main
dset3[105528.0, 113072.25, 116494.0, 124430.75, 137...105528.0137098.0119373.501.044980e+080.148479-0.9421850.54367300main
dset6[105528.0, 113072.25, 116494.0, 124430.75, 137...105528.0137098.0119373.501.044980e+080.148479-0.9421850.54367300main
.......................................
{'name': 'TimeIncrement', 'type': 'region'}dset8[540.0, 202210.25, 269561.0, 361367.0, 423809.0]540.0423809.0263538.751.775294e+100.608147-0.558767-0.5638932312TimeIncrement
dset9[540.0, 202210.25, 269561.0, 361367.0, 423809.0]540.0423809.0263538.751.775294e+100.608147-0.558767-0.5638932312TimeIncrement
dset2[540.0, 202210.25, 269561.0, 361367.0, 423809.0]540.0423809.0263538.751.775294e+100.608147-0.558767-0.5638932312TimeIncrement
dset3[540.0, 202210.25, 269561.0, 361367.0, 423809.0]540.0423809.0263538.751.775294e+100.608147-0.558767-0.5638932312TimeIncrement
dset4[540.0, 202210.25, 269561.0, 361367.0, 423809.0]540.0423809.0263538.751.775294e+100.608147-0.558767-0.5638932312TimeIncrement
\n", + "

240 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " q \\\n", + "node dataset \n", + "{'name': 'main', 'type': 'region'} dset9 [105528.0, 113072.25, 116494.0, 124430.75, 137... \n", + " dset7 [105528.0, 113072.25, 116494.0, 124430.75, 137... \n", + " dset1 [105528.0, 113072.25, 116494.0, 124430.75, 137... \n", + " dset3 [105528.0, 113072.25, 116494.0, 124430.75, 137... \n", + " dset6 [105528.0, 113072.25, 116494.0, 124430.75, 137... \n", + "... ... \n", + "{'name': 'TimeIncrement', 'type': 'region'} dset8 [540.0, 202210.25, 269561.0, 361367.0, 423809.0] \n", + " dset9 [540.0, 202210.25, 269561.0, 361367.0, 423809.0] \n", + " dset2 [540.0, 202210.25, 269561.0, 361367.0, 423809.0] \n", + " dset3 [540.0, 202210.25, 269561.0, 361367.0, 423809.0] \n", + " dset4 [540.0, 202210.25, 269561.0, 361367.0, 423809.0] \n", + "\n", + " min max \\\n", + "node dataset \n", + "{'name': 'main', 'type': 'region'} dset9 105528.0 137098.0 \n", + " dset7 105528.0 137098.0 \n", + " dset1 105528.0 137098.0 \n", + " dset3 105528.0 137098.0 \n", + " dset6 105528.0 137098.0 \n", + "... ... ... \n", + "{'name': 'TimeIncrement', 'type': 'region'} dset8 540.0 423809.0 \n", + " dset9 540.0 423809.0 \n", + " dset2 540.0 423809.0 \n", + " dset3 540.0 423809.0 \n", + " dset4 540.0 423809.0 \n", + "\n", + " mean var \\\n", + "node dataset \n", + "{'name': 'main', 'type': 'region'} dset9 119373.50 1.044980e+08 \n", + " dset7 119373.50 1.044980e+08 \n", + " dset1 119373.50 1.044980e+08 \n", + " dset3 119373.50 1.044980e+08 \n", + " dset6 119373.50 1.044980e+08 \n", + "... ... ... \n", + "{'name': 'TimeIncrement', 'type': 'region'} dset8 263538.75 1.775294e+10 \n", + " dset9 263538.75 1.775294e+10 \n", + " dset2 263538.75 1.775294e+10 \n", + " dset3 263538.75 1.775294e+10 \n", + " dset4 263538.75 1.775294e+10 \n", + "\n", + " imb kurt \\\n", + "node dataset \n", + "{'name': 'main', 'type': 'region'} dset9 0.148479 -0.942185 \n", + " dset7 0.148479 -0.942185 \n", + " dset1 0.148479 -0.942185 \n", + " dset3 0.148479 -0.942185 \n", + " dset6 0.148479 -0.942185 \n", + "... ... ... \n", + "{'name': 'TimeIncrement', 'type': 'region'} dset8 0.608147 -0.558767 \n", + " dset9 0.608147 -0.558767 \n", + " dset2 0.608147 -0.558767 \n", + " dset3 0.608147 -0.558767 \n", + " dset4 0.608147 -0.558767 \n", + "\n", + " skew hatchet_nid nid \\\n", + "node dataset \n", + "{'name': 'main', 'type': 'region'} dset9 0.543673 0 0 \n", + " dset7 0.543673 0 0 \n", + " dset1 0.543673 0 0 \n", + " dset3 0.543673 0 0 \n", + " dset6 0.543673 0 0 \n", + "... ... ... .. \n", + "{'name': 'TimeIncrement', 'type': 'region'} dset8 -0.563893 23 12 \n", + " dset9 -0.563893 23 12 \n", + " dset2 -0.563893 23 12 \n", + " dset3 -0.563893 23 12 \n", + " dset4 -0.563893 23 12 \n", + "\n", + " name \n", + "node dataset \n", + "{'name': 'main', 'type': 'region'} dset9 main \n", + " dset7 main \n", + " dset1 main \n", + " dset3 main \n", + " dset6 main \n", + "... ... \n", + "{'name': 'TimeIncrement', 'type': 'region'} dset8 TimeIncrement \n", + " dset9 TimeIncrement \n", + " dset2 TimeIncrement \n", + " dset3 TimeIncrement \n", + " dset4 TimeIncrement \n", + "\n", + "[240 rows x 11 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bp.gf['time'].dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "MultiIndex: 240 entries, (Node({'name': 'main', 'type': 'region'}), 'dset9') to (Node({'name': 'TimeIncrement', 'type': 'region'}), 'dset4')\n", + "Data columns (total 11 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 q 240 non-null object \n", + " 1 min 240 non-null float64\n", + " 2 max 240 non-null float64\n", + " 3 mean 240 non-null float64\n", + " 4 var 240 non-null float64\n", + " 5 imb 240 non-null float64\n", + " 6 kurt 240 non-null float64\n", + " 7 skew 240 non-null float64\n", + " 8 hatchet_nid 240 non-null object \n", + " 9 nid 240 non-null object \n", + " 10 name 240 non-null object \n", + "dtypes: float64(7), object(4)\n", + "memory usage: 22.3+ KB\n" + ] + } + ], + "source": [ + "bp.gf['time'].dataframe.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Case: Drop index by 'dataset'." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jarus/Work/llnl/hatchet/venv/lib/python3.9/site-packages/IPython/core/interactiveshell.py:3441: PerformanceWarning: indexing past lexsort depth may impact performance.\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n" + ] + } + ], + "source": [ + "bp = BoxPlot(multi_index_gf=gf_ensemble, metrics=[\"time\"], drop_index_levels=[\"dataset\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
qminmaxmeanvarimbkurtskewhatchet_nidtime (inc)nidname
noderank
{'name': 'main', 'type': 'region'}0[121489.0, 121489.0, 121489.0, 121489.0, 12148...121489.0121489.0121489.00.00.0-3.00.005882425.00main
4[118953.0, 118953.0, 118953.0, 118953.0, 11895...118953.0118953.0118953.00.00.0-3.00.005905595.00main
5[133256.0, 133256.0, 133256.0, 133256.0, 13325...133256.0133256.0133256.00.00.0-3.00.005877613.00main
6[114035.0, 114035.0, 114035.0, 114035.0, 11403...114035.0114035.0114035.00.00.0-3.00.005870933.00main
7[137098.0, 137098.0, 137098.0, 137098.0, 13709...137098.0137098.0137098.00.00.0-3.00.005898724.00main
..........................................
{'name': 'TimeIncrement', 'type': 'region'}2[171635.0, 171635.0, 171635.0, 171635.0, 17163...171635.0171635.0171635.00.00.0-3.00.023171635.012TimeIncrement
1[212402.0, 212402.0, 212402.0, 212402.0, 21240...212402.0212402.0212402.00.00.0-3.00.023212402.012TimeIncrement
0[418469.0, 418469.0, 418469.0, 418469.0, 41846...418469.0418469.0418469.00.00.0-3.00.023418469.012TimeIncrement
7[540.0, 540.0, 540.0, 540.0, 540.0]540.0540.0540.00.00.0-3.00.023540.012TimeIncrement
6[423809.0, 423809.0, 423809.0, 423809.0, 42380...423809.0423809.0423809.00.00.0-3.00.023423809.012TimeIncrement
\n", + "

192 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " q \\\n", + "node rank \n", + "{'name': 'main', 'type': 'region'} 0 [121489.0, 121489.0, 121489.0, 121489.0, 12148... \n", + " 4 [118953.0, 118953.0, 118953.0, 118953.0, 11895... \n", + " 5 [133256.0, 133256.0, 133256.0, 133256.0, 13325... \n", + " 6 [114035.0, 114035.0, 114035.0, 114035.0, 11403... \n", + " 7 [137098.0, 137098.0, 137098.0, 137098.0, 13709... \n", + "... ... \n", + "{'name': 'TimeIncrement', 'type': 'region'} 2 [171635.0, 171635.0, 171635.0, 171635.0, 17163... \n", + " 1 [212402.0, 212402.0, 212402.0, 212402.0, 21240... \n", + " 0 [418469.0, 418469.0, 418469.0, 418469.0, 41846... \n", + " 7 [540.0, 540.0, 540.0, 540.0, 540.0] \n", + " 6 [423809.0, 423809.0, 423809.0, 423809.0, 42380... \n", + "\n", + " min max \\\n", + "node rank \n", + "{'name': 'main', 'type': 'region'} 0 121489.0 121489.0 \n", + " 4 118953.0 118953.0 \n", + " 5 133256.0 133256.0 \n", + " 6 114035.0 114035.0 \n", + " 7 137098.0 137098.0 \n", + "... ... ... \n", + "{'name': 'TimeIncrement', 'type': 'region'} 2 171635.0 171635.0 \n", + " 1 212402.0 212402.0 \n", + " 0 418469.0 418469.0 \n", + " 7 540.0 540.0 \n", + " 6 423809.0 423809.0 \n", + "\n", + " mean var imb kurt \\\n", + "node rank \n", + "{'name': 'main', 'type': 'region'} 0 121489.0 0.0 0.0 -3.0 \n", + " 4 118953.0 0.0 0.0 -3.0 \n", + " 5 133256.0 0.0 0.0 -3.0 \n", + " 6 114035.0 0.0 0.0 -3.0 \n", + " 7 137098.0 0.0 0.0 -3.0 \n", + "... ... ... ... ... \n", + "{'name': 'TimeIncrement', 'type': 'region'} 2 171635.0 0.0 0.0 -3.0 \n", + " 1 212402.0 0.0 0.0 -3.0 \n", + " 0 418469.0 0.0 0.0 -3.0 \n", + " 7 540.0 0.0 0.0 -3.0 \n", + " 6 423809.0 0.0 0.0 -3.0 \n", + "\n", + " skew hatchet_nid time (inc) \\\n", + "node rank \n", + "{'name': 'main', 'type': 'region'} 0 0.0 0 5882425.0 \n", + " 4 0.0 0 5905595.0 \n", + " 5 0.0 0 5877613.0 \n", + " 6 0.0 0 5870933.0 \n", + " 7 0.0 0 5898724.0 \n", + "... ... ... ... \n", + "{'name': 'TimeIncrement', 'type': 'region'} 2 0.0 23 171635.0 \n", + " 1 0.0 23 212402.0 \n", + " 0 0.0 23 418469.0 \n", + " 7 0.0 23 540.0 \n", + " 6 0.0 23 423809.0 \n", + "\n", + " nid name \n", + "node rank \n", + "{'name': 'main', 'type': 'region'} 0 0 main \n", + " 4 0 main \n", + " 5 0 main \n", + " 6 0 main \n", + " 7 0 main \n", + "... .. ... \n", + "{'name': 'TimeIncrement', 'type': 'region'} 2 12 TimeIncrement \n", + " 1 12 TimeIncrement \n", + " 0 12 TimeIncrement \n", + " 7 12 TimeIncrement \n", + " 6 12 TimeIncrement \n", + "\n", + "[192 rows x 12 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bp.gf['time'].dataframe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the **roundtrip** interface, we can then visualize the compute boxplot information. Below, we load the roundtrip interface that allows users to visualize plots on jupyter notebook cells directly. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bp_json = bp.to_json()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bp_json" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "hatchet-venv", + "language": "python", + "name": "hatchet-venv" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/examples/tutorial/roundtrip-demo.ipynb b/docs/examples/tutorial/roundtrip-demo.ipynb index 144879b9..51083824 100644 --- a/docs/examples/tutorial/roundtrip-demo.ipynb +++ b/docs/examples/tutorial/roundtrip-demo.ipynb @@ -15,9 +15,347 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "var Roundtrip_Obj = {};\n", + "var refresh_cycle = false;\n", + "var clicked_cell = null;\n", + "var cached_cells = Jupyter.notebook.get_cell_elements();\n", + "\n", + "/**\n", + " * @name unindentPyCode\n", + " * @description Removes leading indentations from a python code string.\n", + " * \n", + " * @param {string} code Python code in string form\n", + " * @returns Passed code string but with no leading indentations\n", + " */\n", + "function unindentPyCode(code){\n", + " let uicode = code.split('\\n');\n", + " let indent = 0;\n", + "\n", + " uicode.forEach((l,i, arr)=>{\n", + " if(i == 0){\n", + " indent = l.search(/\\S/);\n", + " }\n", + " arr[i] = l.slice(indent);\n", + " })\n", + " uicode = uicode.join('\\n');\n", + " return uicode;\n", + "}\n", + "\n", + "/**\n", + " * @name buildPythonAssignment\n", + " * @description Builds up a python code string which assigns javascript data back into jypyter notebook namespace\n", + " * \n", + " * @param {string} val This is data assigned back to the python code\n", + " * @param {string} py_var This is the variable into which val is assigned\n", + " * @param {string} converter This is a definition of a python function which translates data back to the desired format\n", + " * @returns The python code to be run in the jupyter shell\n", + " */\n", + "function buildPythonAssignment(val, py_var, converter){\n", + " // console.log(val, py_var, converter);\n", + " var holder = `'${val}'`;\n", + " var code = `${unindentPyCode(converter.code)}`\n", + " code += `\\ntmp = ${holder}`;\n", + " code += `\\n${py_var} = ${converter.name}(tmp)`\n", + "\n", + " return code\n", + "}\n", + "\n", + "/**\n", + " * @name manageNewCell\n", + " * \n", + " * @description Increments all two way bound cell ids by the number of new cells which proceed them. \n", + " * Ex. Adding one cell at position 2 will increment a bound cell at position 3 from 3->4. \n", + " * \n", + " * @param {array} newCells A list of our current cells in the notebook to be compared against cached cells\n", + " * @param {} obj The current roundtrip object containing all data bindings\n", + " */\n", + "function manageNewCell(newCells, obj){\n", + " let newIds = [];\n", + "\n", + " Object.keys(newCells).forEach(function(i){\n", + " if(!Object.values(cached_cells).includes(newCells[i]) && !isNaN(i)){\n", + " newIds.push(i);\n", + " }\n", + " });\n", + "\n", + " //increment all bindings past each new id\n", + " for(let js_var in obj){\n", + " for(let id of newIds){\n", + " for(let key in obj[js_var][\"two_way\"]){\n", + " obj[js_var][\"two_way\"][key].forEach((two_way_id, i) => {\n", + " if(two_way_id > id){\n", + " obj[js_var][\"two_way\"][key][i] += 1;\n", + " }\n", + " });\n", + " }\n", + " } \n", + " }\n", + "\n", + " cached_cells = newCells;\n", + "}\n", + "\n", + "function manageDeletedCell(newCells, obj){\n", + " let deletedId = null;\n", + " \n", + " for(i of Object.keys(cachedCells)){\n", + " if (cached_cells[i] !== newCells[i]){\n", + " deletedId = i;\n", + " break;\n", + " }\n", + " }\n", + "\n", + "}\n", + "\n", + "\n", + "function bindClickDetectToCells(){\n", + " let cells = Jupyter.notebook.get_cell_elements();\n", + "\n", + " for(let i in Object.keys(cells)){\n", + " let cell = cells[i];\n", + "\n", + " if(cell !== undefined){\n", + " cell.addEventListener('mousedown', () => {\n", + " clicked_cell = i;\n", + " }, true)\n", + " }\n", + " }\n", + "}\n", + "\n", + "bindClickDetectToCells();\n", + "\n", + "/**\n", + " * @name RT_Handler\n", + " * @description A wrapper for our roundtrip object. It is called as a proxy for the\n", + " * roundtrip object defined above. This enables us to define custom call backs for\n", + " * gets and sets on the roundtrip object. The custom set handles necessary data conversion,\n", + " * the registering of two-way bound variables and automatic updating of watched cells. The get\n", + " * allows users to interact with the underlying object without worrying about the proxy.\n", + " */\n", + "var RT_Handler = {\n", + " set(obj, prop, value){\n", + " //Do cell housekeeping\n", + "\n", + "\n", + " //Initial pass of value into roundtrip object\n", + " // from python code; there may be multiple different\n", + " // visualizations of the same type we need to catch\n", + " if (typeof value === 'object' && value.hasOwnProperty('origin') && value.origin == 'INIT'){\n", + " \n", + " /**\n", + " * In this code block we need to check if there is already a \n", + " * an array of id's which are two way bound already defined and \n", + " * add to it or remove from it\n", + " */\n", + " let ida = Jupyter.notebook.get_selected_index()-1;\n", + " value.id = ida;\n", + " let new_val = value;\n", + "\n", + " // Block updating bindings while jupyter is running\n", + " if(refresh_cycle){\n", + " new_val = obj[prop];\n", + " new_val.data = value.data;\n", + " return Reflect.set(obj, prop, new_val);\n", + " }\n", + "\n", + " /**\n", + " * The broad case where we are updating bindings \n", + " * on existing data\n", + " */\n", + " if(obj[prop] != undefined){\n", + " new_val = obj[prop];\n", + " new_val.data = value.data;\n", + " new_val.converter = value.converter;\n", + "\n", + " // If there is no two way array, create one\n", + " // Else push on our new id\n", + " if(value.two_way === true){\n", + " if(!Object.keys(new_val.two_way).includes(value['python_var'])){\n", + " new_val.two_way[value['python_var']] = [];\n", + " }\n", + "\n", + " let pybinding = new_val.two_way[value['python_var']];\n", + "\n", + " if(!pybinding.includes(value.id)){\n", + " pybinding.push(value.id);\n", + " }\n", + "\n", + " }\n", + "\n", + " //Deregister a cell id from being two-way bound now\n", + " else if(value.two_way === false && Object.keys(new_val.two_way).includes(value['python_var'])){\n", + " let pybinding = new_val.two_way[value['python_var']];\n", + " const index = pybinding.indexOf(value.id);\n", + " \n", + " if (index > -1) {\n", + " pybinding.splice(index, 1);\n", + " }\n", + " }\n", + " }\n", + "\n", + " //Initalize a new two-way object if\n", + " // one did not exist\n", + " else{\n", + " if(new_val.two_way == true){\n", + " new_val.two_way = {};\n", + " new_val.two_way[value['python_var']] = [value.id];\n", + " }\n", + " else{\n", + " new_val.two_way = {};\n", + " }\n", + " delete new_val.id;\n", + " delete new_val.from_py;\n", + " delete new_val.python_var;\n", + " }\n", + "\n", + " return Reflect.set(obj, prop, new_val);\n", + " }\n", + " //Assignment from javascript code\n", + " else {\n", + " // TODO: make the py/js data identification object a\n", + " // formal class\n", + " if(obj[prop] === undefined){\n", + " obj[prop] = {\n", + " two_way: {},\n", + " origin: \"JS\",\n", + " data: null,\n", + " python_var: \"\",\n", + " converter: null,\n", + " type: typeof(value)\n", + " }\n", + " }\n", + "\n", + " var execable_cells = [];\n", + " let origin = 'STANDARD';\n", + " let python_var = '';\n", + "\n", + " if (typeof value === 'object' && \n", + " value.hasOwnProperty('origin') && \n", + " value.origin == 'PYASSIGN'){\n", + "\n", + " origin = value.origin;\n", + " python_var = value.python_var;\n", + " value = value.data;\n", + " }\n", + "\n", + " //TODO: Replace with imported, webpacked D3\n", + " require(['https://d3js.org/d3.v4.min.js'], function(d3) {\n", + "\n", + " // When 2 way bound this calls automatically when something changes\n", + " if (obj[prop] !== undefined && Object.keys(obj[prop][\"two_way\"]).length > 0){\n", + "\n", + " let current_cell = Number(clicked_cell);\n", + " let py_var = '';\n", + "\n", + " //ust set the data without updating if our current cell is not two way bound\n", + " if(origin == 'STANDARD'){\n", + " let found = false;\n", + " for(let key in obj[prop][\"two_way\"]){\n", + " if (obj[prop][\"two_way\"][key].includes(current_cell)){\n", + " found = true;\n", + " py_var = key;\n", + " }\n", + " }\n", + "\n", + " if(!found){\n", + " return Reflect.set(obj[prop], \"data\", value);\n", + " }\n", + " }\n", + "\n", + "\n", + " if(origin == 'PYASSIGN'){\n", + " py_var = python_var;\n", + " }\n", + "\n", + "\n", + " /**\n", + " * We now have a list of registered cells we can execute.\n", + " * So we look through our javascript variables to see if they\n", + " * are bound to the same py variable as our current assignment\n", + " * TODO: Make this list update when cells are moved up or down\n", + " */\n", + "\n", + " for(let js_var in obj){\n", + " let boundpyvars = Object.keys(obj[js_var][\"two_way\"]);\n", + "\n", + " if(boundpyvars.includes(py_var)){\n", + " let clls = obj[js_var][\"two_way\"][py_var].filter(x => x != current_cell );\n", + " execable_cells = execable_cells.concat(clls);\n", + " }\n", + " }\n", + "\n", + " if(origin == 'STANDARD'){\n", + " // TODO:THROW AN ERROR IF CONVERTER == NONE\n", + " const code = buildPythonAssignment(value, py_var, obj[prop][\"converter\"]);\n", + " \n", + " //TODO: Turn this into a function that manages error reporting and printing\n", + " Jupyter.notebook.kernel.execute(code, { \n", + " shell:{\n", + " reply: function(r){\n", + " //consider putting this in a reserved jupyter variable\n", + " if(r.content.status == 'error'){\n", + " console.error(`${r.content.ename} in JS->Python coversion:\\n ${r.content.evalue}`)\n", + " }\n", + " }\n", + " }\n", + " });\n", + " }\n", + "\n", + " refresh_cycle = true;\n", + " Jupyter.notebook.execute_cells(execable_cells);\n", + "\n", + " /**\n", + " * Test every half second to see if some of the\n", + " * jupyter cells are still running. Avoids a race condition\n", + " * where incorrect ids were stored in our roundtrip object.\n", + " */\n", + " const test_running = function(){\n", + " let runtest = d3.selectAll(\".running\");\n", + " if(runtest.empty()){\n", + " refresh_cycle = false;\n", + " return;\n", + " }\n", + " else{\n", + " setTimeout(test_running, 500);\n", + " }\n", + " }\n", + "\n", + " test_running();\n", + " }\n", + "\n", + " });\n", + " } \n", + "\n", + " return Reflect.set(obj[prop], \"data\", value);\n", + " },\n", + " get(obj, prop, reciever){\n", + " let ret = obj[prop].data\n", + " return ret; \n", + " }\n", + "}\n", + "\n", + "window.Roundtrip = new Proxy(Roundtrip_Obj, RT_Handler);\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Warning: Your Roundtrip visualizations may not load properly. Roundtrip only supports Python Version 3.x.x. You are using Python v2.7.18\n" + ] + } + ], "source": [ "import os, sys\n", "from IPython.display import HTML, display\n", @@ -497,12 +835,12 @@ "kernelspec": { "display_name": "Python 3", "language": "python", - "name": "python3" + "name": "py2_env" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 3 + "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", diff --git a/hatchet/tests/boxplot.py b/hatchet/tests/boxplot.py new file mode 100644 index 00000000..ef514cce --- /dev/null +++ b/hatchet/tests/boxplot.py @@ -0,0 +1,99 @@ +# Copyright 2017-2022 Lawrence Livermore National Security, LLC and other +# Hatchet Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: MIT + +import pandas as pd + +import hatchet as ht +from hatchet.util.boxplot import BoxPlot + +bp_columns = [ + "name", + "q", + "ocat", + "ometric", + "min", + "max", + "mean", + "var", + "imb", + "kurt", + "skew", +] + + +def test_gf_format(calc_pi_hpct_db): + gf = ht.GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) + bp = BoxPlot(multi_index_gf=gf) + + metrics = gf.inc_metrics + gf.exc_metrics + + # Check if the format of target is correct. + assert all(metric in list(bp.gf.keys()) for metric in metrics) + assert all(isinstance(bp.gf[metric], ht.GraphFrame) for metric in metrics) + assert all(isinstance(bp.gf[metric].dataframe, pd.DataFrame) for metric in metrics) + assert all(isinstance(bp.gf[metric].graph, ht.graph.Graph) for metric in metrics) + + # Check if the required columns are present. + columns = [ + "name", + "q", + "ocat", + "ometric", + "min", + "max", + "mean", + "var", + "imb", + "kurt", + "skew", + ] + assert all( + bp.gf[metric].dataframe.columns.tolist().sort() == columns.sort() + for metric in metrics + ) + + assert all( + len(list(bp.gf[metric].dataframe.index.names)) == 1 for metric in metrics + ) + assert all( + list(bp.gf[metric].dataframe.index.names) == ["node"] for metric in metrics + ) + + +def test_output_dtypes(calc_pi_hpct_db): + gf = ht.GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) + metrics = ["time"] + bp = BoxPlot(multi_index_gf=gf, drop_index_levels=["rank"], metrics=metrics) + + object_dtype = ["name", "nid", "q"] + float_dtype = ["min", "max", "mean", "var", "imb", "kurt", "skew"] + + assert all(bp.gf["time"].dataframe.dtypes[col] == "object" for col in object_dtype) + assert all(bp.gf["time"].dataframe.dtypes[col] == "float64" for col in float_dtype) + + +def test_callsite_count(calc_pi_hpct_db): + gf = ht.GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) + metrics = ["time"] + bp = BoxPlot(multi_index_gf=gf, drop_index_levels=["rank"], metrics=metrics) + + assert len(bp.gf["time"].graph) == len( + bp.gf["time"].dataframe.index.values.tolist() + ) + + +def test_multiple_metrics(calc_pi_hpct_db): + gf = ht.GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) + metrics = ["time", "time (inc)"] + bp = BoxPlot(multi_index_gf=gf, drop_index_levels=["rank"], metrics=metrics) + + assert all(metric in bp.gf for metric in metrics) + + +def test_to_json(calc_pi_hpct_db): + gf = ht.GraphFrame.from_hpctoolkit(str(calc_pi_hpct_db)) + bp = BoxPlot(multi_index_gf=gf, drop_index_levels=["rank"], metrics=["time"]) + json = bp.to_json() + assert all((nid in json.keys()) for nid in gf.dataframe["nid"].unique().tolist()) diff --git a/hatchet/util/boxplot.py b/hatchet/util/boxplot.py new file mode 100644 index 00000000..2bcbb459 --- /dev/null +++ b/hatchet/util/boxplot.py @@ -0,0 +1,357 @@ +# Copyright 2017-2022 Lawrence Livermore National Security, LLC and other +# Hatchet Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: MIT + +import numpy as np +import pandas as pd +from scipy import stats +import hatchet as ht + + +class BoxPlot: + def __init__( + self, + multi_index_gf, + drop_index_levels=[], + metrics=[], + ): + """ + Boxplot class computes the runtime distributions of a multi-indexed GraphFrame. + + + Arguments: + multi_index_gf: (ht.GraphFrame) Target GraphFrame. + drop_index_levels (Optional): (list) List of columns to aggregate the boxplot + computation. + metrics (Optional): (list) List of metrics to compute. + + Return: None + """ + assert isinstance(multi_index_gf, ht.GraphFrame) + assert isinstance(drop_index_levels, list) + assert isinstance(metrics, list) + + # Reset the indexes in the dataframe. + self.multi_index_gf = multi_index_gf.copy() + self.multi_index_gf.dataframe = self.multi_index_gf.dataframe.reset_index() + self.multi_index_gf.dataframe["_hatchet_nid"] = self.multi_index_gf.dataframe[ + "node" + ].apply(lambda _: _._hatchet_nid) + + self.drop_indexes, self.output_indexes = BoxPlot.validate_drop_index_level( + multi_index_gf, drop_index_levels + ) + self.metrics = BoxPlot.validate_metrics(multi_index_gf, metrics) + + self.agg_columns = ["_hatchet_nid"] + self.output_indexes + self.output_columns = list( + set(self.multi_index_gf.dataframe.columns) + .difference(set(self.agg_columns)) + .difference(set(self.metrics)) + ) + + # Compute the boxplot dictionary keyed by "index" and valued as a dataframe. + self.boxplot_df_dict = BoxPlot.compute( + multi_index_df=self.multi_index_gf.dataframe, + groupby=self.agg_columns, + cols=self.output_columns, + metrics=self.metrics, + ) + + # Convert it to a GraphFrame. + self.gf = self.to_gf() + + @staticmethod + def validate_drop_index_level( + multi_index_gf: ht.GraphFrame, drop_index_levels: list + ): + df_index_levels = list(multi_index_gf.dataframe.index.names) + + # Validate primary index is 'node'. + if "node" not in df_index_levels: + raise Exception( + "ht.util.BoxPlot expects the primary index of `multi_index_gf` to be `ht.Graph.Node`." + ) + + # Validate drop_index in the dataframe, if provided. + if len(drop_index_levels) > 0: + for _index in drop_index_levels: + if _index not in df_index_levels: + raise Exception( + f"'drop_index_level: {_index}' is not a valid index of 'multi_index_gf'." + ) + elif len(drop_index_levels) == 0: + # Validate if only 2 indexes are provided. Else, warn the user to pass `drop_column`. + if len(df_index_levels) > 2: + raise Exception( + f"multi_index_gf contains {len(df_index_levels)} indexes = {df_index_levels}. ht.util.BoxPlot is limited to processing GraphFrames with 2 indexes. Please specify the `drop_index` by which BoxPlot API will compute the distribution to avoid ambiguity." + ) + elif len(df_index_levels) == 2: + drop_index_levels = [multi_index_gf.dataframe.index.names[1]] + + # Drop the 'node' and `drop_index_levels` from the + # ht.GraphFrame.DataFrame's indexes. + df_index_levels.remove("node") + for index in drop_index_levels: + df_index_levels.remove(index) + + return drop_index_levels, df_index_levels + + @staticmethod + def validate_metrics(multi_index_gf: ht.GraphFrame, metrics: list): + # Validate metrics are columns in the dataframe, if provided. + if len(metrics) > 0: + for metric in metrics: + if metric not in multi_index_gf.dataframe.columns: + raise Exception(f"{metric} not found in the gf.dataframe.") + + if len(metrics) == 0: + return multi_index_gf.inc_metrics + multi_index_gf.exc_metrics + return metrics + + @staticmethod + def df_groupby(df, groupby, cols): + """ + Group the dataframe by groupby column. + + Arguments: + df (graphframe): self's graphframe + groupby: groupby columns on dataframe + cols: columns from the dataframe + + Return: + (dict): A dictionary of dataframes (columns) keyed by groups. + """ + _df = df.set_index(groupby) + _levels = _df.index.unique().tolist() + return {_df.xs(_)["nid"].unique().tolist()[0]: _df.xs(_)[cols] for _ in _levels} + + @staticmethod + def outliers(data, scale=1.5, side="both"): + """ + Calculate outliers from the data. + + Arguments: + data (np.ndarray or pd.Series): Array of values. + scale (float): IQR range for outliers. + side (str): directions for calculating the outliers, i.e., left, + right, both. + + Return: + outliers (np.ndarray): Array of outlier values. + """ + assert isinstance(data, (pd.Series, np.ndarray)) + assert len(data.shape) == 1 + assert isinstance(scale, float) + assert side in ["gt", "lt", "both"] + + d_q13 = np.percentile(data, [25.0, 75.0]) + iqr_distance = np.multiply(stats.iqr(data), scale) + + if side in ["gt", "both"]: + upper_range = d_q13[1] + iqr_distance + upper_outlier = np.greater(data - upper_range.reshape(1), 0) + + if side in ["lt", "both"]: + lower_range = d_q13[0] - iqr_distance + lower_outlier = np.less(data - lower_range.reshape(1), 0) + + if side == "gt": + return upper_outlier + if side == "lt": + return lower_outlier + if side == "both": + return np.logical_or(upper_outlier, lower_outlier) + + @staticmethod + def compute(multi_index_df, groupby, metrics, cols): + """ + Compute boxplot quartiles and statistics. + + Arguments: + multi_index_df: Dataframe to calculate the boxplot information. + groupby: Columns to aggregate the data. + cols: Columns to retain in the output dataframe. + + Return: + ret (dict): { + "metric1": { + "q": (array) quartiles (i.e., [q0, q1, q2, q3, q4]), + "ometric": (array) outlier from metric, + "ocat": (array) outlier from cat_column, + "d": (array) metric values, + "rng": (tuple) (min, max), + "uv": (tuple) (mean, variance), + "imb": (number) imbalance, + "ks": (tuple) (kurtosis, skewness) + } + } + """ + group_df_dict = BoxPlot.df_groupby( + df=multi_index_df, + groupby=groupby, + cols=cols + metrics, + ) + + boxplot_dict_df = {_: {} for _ in group_df_dict.keys()} + for callsite, callsite_df in group_df_dict.items(): + ret = {_: {} for _ in metrics} + for tk, tv in zip(metrics, metrics): + q = np.percentile(callsite_df[tv], [0.0, 25.0, 50.0, 75.0, 100.0]) + mask = BoxPlot.outliers(callsite_df[tv]) + mask = np.where(mask)[0] + + _data = callsite_df[tv].to_numpy() + _min, _mean, _max = _data.min(), _data.mean(), _data.max() + _var = _data.var() if _data.shape[0] > 0 else 0.0 + _imb = (_max - _mean) / _mean if not np.isclose(_mean, 0.0) else _max + _skew = stats.skew(_data) + _kurt = stats.kurtosis(_data) + + # TODO: Outliers and their corresponding rank member is not being + # fetched accurately. + # _outliers = df[tv].to_numpy()[mask] + + ret[tk] = { + "q": q, + # "ometric": _outliers, + # "ocat": df.index[1] if len(_outliers) > 0 else -1, # not being used in the vis yet. + "d": _data, + "rng": (_min, _max), + "uv": (_mean, _var), + "imb": _imb, + "ks": (_kurt, _skew), + } + + for _column in cols: + ret[tk][_column] = callsite_df[_column].iloc[0] + + boxplot_dict_df[callsite] = ret + + return boxplot_dict_df + + def to_json(self): + """ + Unpack the boxplot data into JSON format. + + Arguments: + + Return: + result (dict): { + "callsite1": { + "tgt": self._unpack_callsite, + "bkg": self._unpack_callsite + }, + } + """ + return { + callsite: self._unpack_callsite(callsite) + for callsite in self.boxplot_df_dict.keys() + } + + def _unpack_callsite(self, callsite): + """ + Helper function to unpack the data by callsite. + + Arguments: + callsite: Callsite's name + with_htnode: (bool) An option to add hatchet.Node to the dict. + + Return: + ret (dict): { + "metric": { + "q": (array) quartiles (i.e., [q0, q1, q2, q3, q4]), + "ocat": (array) outlier from cat_column, (TODO) + "ometric": (array) outlier from metri, (TODO) + "min": (number) minimum, + "max": (number) maximum, + "mean": (number) mean, + "var": (number) variance, + "imb": (number) imbalance, + "kurt": (number) kurtosis, + "skew": (number) skewness, + } + } + """ + ret = {} + for metric in self.metrics: + box = self.boxplot_df_dict[callsite][metric] + ret[metric] = { + "q": box["q"].tolist(), + # "ocat": box["ocat"], # TODO + # "ometric": box["ometric"].tolist(), # TODO + "min": box["rng"][0], + "max": box["rng"][1], + "mean": box["uv"][0], + "var": box["uv"][1], + "imb": box["imb"], + "kurt": box["ks"][0], + "skew": box["ks"][1], + } + + for _column in self.output_columns: + ret[metric][_column] = box[_column] + + return ret + + def _to_gf_by_metric(self, gf, metric): + """ + Wrapper function to unpack the boxplot data into Hatchet.GraphFrame by + respective metric. + + Argument: + gf: (hatchet.GraphFrame) GraphFrame + metric: (string) Metric + + Return: + hatchet.GraphFrame with boxplot information as columns. + + """ + _dtype = { + "name": str, + "q": object, + # "ocat": object, # TODO + # "ometric": object, # TODO + "min": np.float64, + "max": np.float64, + "mean": np.float64, + "var": np.float64, + "imb": np.float64, + "kurt": np.float64, + "skew": np.float64, + } + _dict = { + callsite: self._unpack_callsite(callsite)[metric] + for callsite in self.boxplot_df_dict.keys() + } + tmp_df = pd.DataFrame.from_dict(data=_dict).T + tmp_df = tmp_df.astype(_dtype) + tmp_df.index.names = self.agg_columns + tmp_df.reset_index(inplace=True) + + tmp_df = tmp_df.drop(columns=self.drop_indexes + ["_hatchet_nid"]) + tmp_df.set_index(["node"] + self.output_indexes, inplace=True) + + # TODO: Would we need to squash the graph. (Check in the to_gf() method.) + # Call into the gf.groupby_aggregate() (in PR) before returning the gf. + return ht.GraphFrame(gf.graph, tmp_df, gf.exc_metrics, gf.inc_metrics) + + def to_gf(self): + """ + Unpack the boxplot data into GraphFrame object. + + Note: In this case, only the hatchet.dataframe will be updated, with + hatchet.Graph being the same as the input gf. + + Arguments: + + Return: + (dict) : { + "metric": hatchet.GraphFrame, ... + } + """ + return { + metric: self._to_gf_by_metric(self.multi_index_gf, metric) + for metric in self.metrics + } diff --git a/hatchet/vis/loader.py b/hatchet/vis/loader.py index 7867b0af..83efa2b2 100644 --- a/hatchet/vis/loader.py +++ b/hatchet/vis/loader.py @@ -34,12 +34,8 @@ def serialize(obj): def _query_to_dict(json_query): - import json - return json_query - return json.loads(json_query) - @magics_class class CCT(Magics): diff --git a/requirements.txt b/requirements.txt index 0af39485..8fe52a4e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,6 @@ PyYAML cython multiprocess textX +scipy caliper-reader +jsonschema \ No newline at end of file