diff --git a/docs/examples/tutorial/performance_variability_boxplots.ipynb b/docs/examples/tutorial/performance_variability_boxplots.ipynb new file mode 100644 index 00000000..4eb250c4 --- /dev/null +++ b/docs/examples/tutorial/performance_variability_boxplots.ipynb @@ -0,0 +1,936 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Performance Variability Boxplots\n", + "\n", + "Performance variability boxplots provide an insight into the runtime distribution and its varibility across callsites. Boxplots are calculated to represent the range of the distribution and outliers (dots) correspond which are beyond the 1.5*IQR. Additionally, several statistical measures like mean, variance, kurtosis, skewness are also provided." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import os, sys\n", + "from IPython.display import HTML, display\n", + "\n", + "# Hatchet imports\n", + "import hatchet as ht\n", + "from hatchet.external.scripts import BoxPlot" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we will construct a **hatchet.GraphFrame** using a sample dataset in our repository, **caliper-lulesh-json**. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "data_dir = os.path.realpath(\"../../../hatchet/tests/data\")\n", + "data_path = os.path.join(data_dir, \"caliper-lulesh-json/lulesh-annotation-profile.json\")\n", + "gf = ht.GraphFrame.from_caliper_json(data_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, using the **hatchet.GraphFrame**, we can calculate the data required for performance variability boxplot using an exposed hatchet API, **Boxplot**.\n", + "\n", + "The interface excepts the following attributes:\n", + "1. `tgt_gf` - Target hatchet.GraphFrame \n", + "2. `bkg_gf` - Background hatchet.GraphFrame (optional)\n", + "3. `callsites` - List of callsite names for which we want to compute/visualize the boxplots.\n", + "4. `metrics` - Runtime metrics for which we need to calculate the boxplots.\n", + "5. `iqr_scale` - Interquartile range scale (by default = 1.5)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "callsites = gf.dataframe.name.unique().tolist()\n", + "bp = BoxPlot(cat_column='rank', tgt_gf=gf, bkg_gf=None, callsites=callsites, metrics=[\"time\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Boxplot** API calculates the results and stores as a GraphFrames in a dictionary (i.e., `tgt` and `bkg`). " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'time': }" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bp.tgt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the **roundtrip** interface, we can then visualize the compute boxplot information. Below, we load the roundtrip interface that allows users to visualize plots on jupyter notebook cells directly. " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The roundtrip extension is already loaded. To reload it, use:\n", + " %reload_ext roundtrip\n" + ] + } + ], + "source": [ + "# This is the relative path from the notebook to Roundtrip files in hatchet/external/roundtrip/\n", + "roundtrip_path = '../../../hatchet/external/roundtrip/'\n", + "hatchet_path = \".\"\n", + "\n", + "# Add the path so that the notebook can find the Roundtrip extension\n", + "module_path = os.path.abspath(os.path.join(roundtrip_path)) \n", + "if module_path not in sys.path:\n", + " sys.path.append(module_path)\n", + " sys.path.append(hatchet_path)\n", + "\n", + " \n", + "# Uncomment this line to widen the cells to handle large trees \n", + "#display(HTML(\"\"))\n", + "\n", + "# Load the Roundtrip extension. This only needs to be loaded once.\n", + "%load_ext roundtrip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since **roundtrip** excepts the data in JSON format, **Boxplot** API exposes a method, `to_json()` which will dump the boxplot's graphframes (i.e., `tgt` and `bkg`) in JSON." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "bp_json = bp.to_json()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'tgt': {'time': {'q': [105528.0, 113072.25, 116494.0, 124430.75, 137098.0], 'ocat': [], 'ometric': [], 'min': 105528.0, 'max': 137098.0, 'mean': 119373.5, 'var': 104497970.25, 'imb': 0.14847935262013764, 'kurt': -0.9421848873183336, 'skew': 0.5436725364039101}}}\n" + ] + } + ], + "source": [ + "print(bp_json['main'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, we can trigger the visualization using **roundtrip** magic command, `%loadVisualization`. `%loadVisualization` expects the `roundtrip_path` (path in which roundtrip resides), `\"boxplot\"` (identifier to the visualization type) and variable containing the data for the boxplots (here it is bp_json).\n", + "\n", + "Interactions on the boxplot visualization:\n", + "1. Users can select the metric of interest to visualize the corresponding runtime information.\n", + "2. Users can sort the callsites by their statistical attributes (i.e., mean, min, max, variance, imbalance, kurtosis and skewness).\n", + "3. Users can select the sorting order (i.e., ascending or descending).\n", + "4. Users can select the number of callsites that would be visualized." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " // Grab current context\n", + " elementTop = element.get(0);" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%loadVisualization roundtrip_path \"boxplot\" bp_json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once the exploration of the variability is done. Users can get the corresponding data in their visualization using the `%fetchData` magic command. Similar to the `%loadVisualization`, we will have to specify `\"boxplot\"` to identify the corresponding visualization type. The results will be stored in the following variable (here it is `result_csv` ) in the `.csv` format." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " var holder = variance_df;\n", + " holder = '\"' + holder + '\"';\n", + " console.debug('result_csv = '+ holder);\n", + " IPython.notebook.kernel.execute('result_csv = '+ eval(holder));\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " var holder = variance_df;\n", + " holder = '\"' + holder + '\"';\n", + " console.debug('result_csv = '+ holder);\n", + " IPython.notebook.kernel.execute('result_csv = '+ eval(holder));\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%fetchData \"boxplot\" result_csv" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name,min,max,mean,var,imb,kurt,skew;CalcFBHourglassForceForElems,1088315,1276744,1197360.375,3561043884.734375,0.066298857601664,-0.8618185329919692,-0.336770351062538;CalcKinematicsForElems,493338,907675,740734,20585329027.5,0.22537240088884808,-1.323030118573988,-0.3042530153918946;IntegrateStressForElems,448597,987804,725254.375,29868514054.234375,0.3620103980758475,-1.2658383358291696,-0.1038366357478744;CalcHourglassControlForElems,494580,599077,574309,982583388.75,0.04312660954294639,2.322254192176139,-1.930747431397297;CalcMonotonicQGradientsForElems,326522,448753,393558.125,1927822359.609375,0.140245802319543,-1.5265491924225043,-0.08914394549811265\n" + ] + } + ], + "source": [ + "print(result_csv)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `.csv` formatted output can be converted to a dataframe as shown below." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "columns = result_csv.split(';')[0].split(',')\n", + "data = [x.split(',') for x in result_csv.split(';')[1:]]\n", + "df = pd.DataFrame(data, columns=columns).set_index('name')" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
minmaxmeanvarimbkurtskew
name
CalcFBHourglassForceForElems108831512767441197360.3753561043884.7343750.066298857601664-0.8618185329919692-0.336770351062538
CalcKinematicsForElems49333890767574073420585329027.50.22537240088884808-1.323030118573988-0.3042530153918946
IntegrateStressForElems448597987804725254.37529868514054.2343750.3620103980758475-1.2658383358291696-0.1038366357478744
CalcHourglassControlForElems494580599077574309982583388.750.043126609542946392.322254192176139-1.930747431397297
CalcMonotonicQGradientsForElems326522448753393558.1251927822359.6093750.140245802319543-1.5265491924225043-0.08914394549811265
\n", + "
" + ], + "text/plain": [ + " min max mean \\\n", + "name \n", + "CalcFBHourglassForceForElems 1088315 1276744 1197360.375 \n", + "CalcKinematicsForElems 493338 907675 740734 \n", + "IntegrateStressForElems 448597 987804 725254.375 \n", + "CalcHourglassControlForElems 494580 599077 574309 \n", + "CalcMonotonicQGradientsForElems 326522 448753 393558.125 \n", + "\n", + " var imb \\\n", + "name \n", + "CalcFBHourglassForceForElems 3561043884.734375 0.066298857601664 \n", + "CalcKinematicsForElems 20585329027.5 0.22537240088884808 \n", + "IntegrateStressForElems 29868514054.234375 0.3620103980758475 \n", + "CalcHourglassControlForElems 982583388.75 0.04312660954294639 \n", + "CalcMonotonicQGradientsForElems 1927822359.609375 0.140245802319543 \n", + "\n", + " kurt skew \n", + "name \n", + "CalcFBHourglassForceForElems -0.8618185329919692 -0.336770351062538 \n", + "CalcKinematicsForElems -1.323030118573988 -0.3042530153918946 \n", + "IntegrateStressForElems -1.2658383358291696 -0.1038366357478744 \n", + "CalcHourglassControlForElems 2.322254192176139 -1.930747431397297 \n", + "CalcMonotonicQGradientsForElems -1.5265491924225043 -0.08914394549811265 " + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python-3.9", + "language": "python", + "name": "python-3.9" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/examples/tutorial/roundtrip-demo.ipynb b/docs/examples/tutorial/roundtrip-demo.ipynb index e9c05828..a7a7ea6a 100644 --- a/docs/examples/tutorial/roundtrip-demo.ipynb +++ b/docs/examples/tutorial/roundtrip-demo.ipynb @@ -15,9 +15,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "import os, sys\n", "from IPython.display import HTML, display\n", @@ -48,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -175,11 +188,897 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 3, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " // Grab current context\n", + " elementTop = element.get(0);" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "%loadVisualization roundtrip_path literal_tree" + "%loadVisualization roundtrip_path \"literal_tree\" literal_tree" ] }, { @@ -194,19 +1093,62 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " var holder = jsNodeSelected;\n", + " holder = '\"' + holder + '\"';\n", + " console.debug('myQuery = '+ holder);\n", + " IPython.notebook.kernel.execute('myQuery = '+ eval(holder));\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " var holder = jsNodeSelected;\n", + " holder = '\"' + holder + '\"';\n", + " console.debug('myQuery = '+ holder);\n", + " IPython.notebook.kernel.execute('myQuery = '+ eval(holder));\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# Execute this cell first to populate myQuery with your selection\n", - "%fetchData myQuery" + "%fetchData \"literal_tree\" myQuery" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['*']\n" + ] + } + ], "source": [ "# Now myQuery is loaded and can be used\n", "# If no nodes are clicked/selected, the default behavior is to return a query that shows all nodes.\n", @@ -218,9 +1160,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of input nodes: 12\n", + "Number of output nodes: 12\n", + " __ __ __ __ \n", + " / /_ ____ _/ /______/ /_ ___ / /_\n", + " / __ \\/ __ `/ __/ ___/ __ \\/ _ \\/ __/\n", + " / / / / /_/ / /_/ /__/ / / / __/ /_ \n", + "/_/ /_/\\__,_/\\__/\\___/_/ /_/\\___/\\__/ v1.3.1a0\n", + "\n", + "\u001b[38;5;22m0.000\u001b[0m foo\u001b[0m\n", + "├─ \u001b[38;5;46m5.000\u001b[0m bar\u001b[0m\n", + "│ ├─ \u001b[38;5;46m5.000\u001b[0m baz\u001b[0m\n", + "│ └─ \u001b[38;5;220m10.000\u001b[0m grault\u001b[0m\n", + "└─ \u001b[38;5;22m0.000\u001b[0m qux\u001b[0m\n", + " └─ \u001b[38;5;46m5.000\u001b[0m quux\u001b[0m\n", + " └─ \u001b[38;5;220m10.000\u001b[0m corge\u001b[0m\n", + " ├─ \u001b[38;5;46m5.000\u001b[0m bar\u001b[0m\n", + " │ ├─ \u001b[38;5;46m5.000\u001b[0m baz\u001b[0m\n", + " │ └─ \u001b[38;5;220m10.000\u001b[0m grault\u001b[0m\n", + " ├─ \u001b[38;5;196m15.000\u001b[0m garply\u001b[0m\n", + " └─ \u001b[38;5;220m10.000\u001b[0m grault\u001b[0m\n", + "\n", + "\u001b[4mLegend\u001b[0m (Metric: time Min: 0.00 Max: 15.00)\n", + "\u001b[38;5;196m█ \u001b[0m13.50 - 15.00\n", + "\u001b[38;5;208m█ \u001b[0m10.50 - 13.50\n", + "\u001b[38;5;220m█ \u001b[0m7.50 - 10.50\n", + "\u001b[38;5;46m█ \u001b[0m4.50 - 7.50\n", + "\u001b[38;5;34m█ \u001b[0m1.50 - 4.50\n", + "\u001b[38;5;22m█ \u001b[0m0.00 - 1.50\n", + "\n", + "name\u001b[0m User code \u001b[38;5;160m◀ \u001b[0m Only in left graph \u001b[38;5;28m▶ \u001b[0m Only in right graph\n", + "\n" + ] + } + ], "source": [ "# Load the string-literal tree defined above into a GraphFrame\n", "gf = ht.GraphFrame.from_literal(literal_tree)\n", @@ -250,7 +1230,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -440,30 +1420,957 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "scrolled": false }, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " // Grab current context\n", + " elementTop = element.get(0);" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "%loadVisualization roundtrip_path multiple_roots" + "%loadVisualization roundtrip_path \"literal_tree\" multiple_roots" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " var holder = jsNodeSelected;\n", + " holder = '\"' + holder + '\"';\n", + " console.debug('anotherQuery = '+ holder);\n", + " IPython.notebook.kernel.execute('anotherQuery = '+ eval(holder));\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " var holder = jsNodeSelected;\n", + " holder = '\"' + holder + '\"';\n", + " console.debug('anotherQuery = '+ holder);\n", + " IPython.notebook.kernel.execute('anotherQuery = '+ eval(holder));\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# Execute this cell first to load anotherQuery\n", - "%fetchData anotherQuery" + "%fetchData \"literal_tree\" anotherQuery" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['*']\n" + ] + } + ], "source": [ "# Now anotherQuery is loaded and can be used (after %fetchData executes)\n", "# If no nodes are clicked/selected, the default behavior is to return a query\n", @@ -476,9 +2383,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of input nodes: 25\n", + "Number of output nodes: 25\n", + " __ __ __ __ \n", + " / /_ ____ _/ /______/ /_ ___ / /_\n", + " / __ \\/ __ `/ __/ ___/ __ \\/ _ \\/ __/\n", + " / / / / /_/ / /_/ /__/ / / / __/ /_ \n", + "/_/ /_/\\__,_/\\__/\\___/_/ /_/\\___/\\__/ v1.3.1a0\n", + "\n", + "\u001b[38;5;22m0.000\u001b[0m foo\u001b[0m\n", + "├─ \u001b[38;5;46m5.000\u001b[0m bar\u001b[0m\n", + "│ ├─ \u001b[38;5;46m5.000\u001b[0m baz\u001b[0m\n", + "│ └─ \u001b[38;5;220m10.000\u001b[0m grault\u001b[0m\n", + "├─ \u001b[38;5;22m0.000\u001b[0m qux\u001b[0m\n", + "│ └─ \u001b[38;5;46m5.000\u001b[0m quux\u001b[0m\n", + "│ └─ \u001b[38;5;220m10.000\u001b[0m corge\u001b[0m\n", + "│ ├─ \u001b[38;5;46m5.000\u001b[0m bar\u001b[0m\n", + "│ │ ├─ \u001b[38;5;46m5.000\u001b[0m baz\u001b[0m\n", + "│ │ └─ \u001b[38;5;220m10.000\u001b[0m grault\u001b[0m\n", + "│ ├─ \u001b[38;5;196m15.000\u001b[0m garply\u001b[0m\n", + "│ └─ \u001b[38;5;220m10.000\u001b[0m grault\u001b[0m\n", + "└─ \u001b[38;5;22m0.000\u001b[0m waldo\u001b[0m\n", + " ├─ \u001b[38;5;46m5.000\u001b[0m fred\u001b[0m\n", + " │ ├─ \u001b[38;5;46m5.000\u001b[0m plugh\u001b[0m\n", + " │ └─ \u001b[38;5;46m5.000\u001b[0m xyzzy\u001b[0m\n", + " │ └─ \u001b[38;5;46m5.000\u001b[0m thud\u001b[0m\n", + " │ ├─ \u001b[38;5;46m5.000\u001b[0m baz\u001b[0m\n", + " │ └─ \u001b[38;5;196m15.000\u001b[0m garply\u001b[0m\n", + " └─ \u001b[38;5;196m15.000\u001b[0m garply\u001b[0m\n", + "\u001b[38;5;22m0.000\u001b[0m kap\u001b[0m\n", + "├─ \u001b[38;5;196m15.000\u001b[0m kat\u001b[0m\n", + "└─ \u001b[38;5;46m5.000\u001b[0m kow\u001b[0m\n", + " ├─ \u001b[38;5;46m5.000\u001b[0m kraze\u001b[0m\n", + " └─ \u001b[38;5;46m5.000\u001b[0m krazy\u001b[0m\n", + "\n", + "\u001b[4mLegend\u001b[0m (Metric: time Min: 0.00 Max: 15.00)\n", + "\u001b[38;5;196m█ \u001b[0m13.50 - 15.00\n", + "\u001b[38;5;208m█ \u001b[0m10.50 - 13.50\n", + "\u001b[38;5;220m█ \u001b[0m7.50 - 10.50\n", + "\u001b[38;5;46m█ \u001b[0m4.50 - 7.50\n", + "\u001b[38;5;34m█ \u001b[0m1.50 - 4.50\n", + "\u001b[38;5;22m█ \u001b[0m0.00 - 1.50\n", + "\n", + "name\u001b[0m User code \u001b[38;5;160m◀ \u001b[0m Only in left graph \u001b[38;5;28m▶ \u001b[0m Only in right graph\n", + "\n" + ] + } + ], "source": [ "# Load the string-literal multi-rooted tree defined above into a GraphFrame\n", "multi_gf = ht.GraphFrame.from_literal(multiple_roots)\n", diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js new file mode 100644 index 00000000..800adf25 --- /dev/null +++ b/hatchet/external/roundtrip/boxplot.js @@ -0,0 +1,465 @@ +// TODO: Adopt MVC pattern for this module. +(function (element) { + const BOXPLOT_TYPES = ["tgt", "bkg"]; + const SORTORDER_TYPES = ["asc", "desc"]; + const [path, visType, variableString] = cleanInputs(argList); + + // Quit if visType is not boxplot. + if (visType !== "boxplot") { + console.error("Incorrect visualization type passed.") + return; + } + + // -------------------------------------------------------------------------------- + // RequireJS setup. + // -------------------------------------------------------------------------------- + // Setup the requireJS config to get required libraries. + requirejs.config({ + baseUrl: path, + paths: { + d3src: 'https://d3js.org', + lib: 'lib', + }, + map: { + '*': { + 'd3': 'd3src/d3.v6.min', + 'd3-utils': 'lib/d3_utils', + } + } + }); + + // -------------------------------------------------------------------------------- + // Utility functions. + // -------------------------------------------------------------------------------- + // TODO: Move this to a common utils folder. + /** + * Utility to remove single quotes. + * + * @param {String} strings strings with single quotes. + * @returns {String} strings without single quotes. + */ + function cleanInputs(strings) { + return strings.map((_) => _.replace(/'/g, '"')); + } + + /** + * Sort the callsite ordering based on the attribute. + * + * @param {Array} callsites - Callsites as a list. + * @param {String} metric - Metric passed by user (e.g., time or time (inc)). + * @param {String} attribute - Attribute to sort by. + * @param {String} sortOrder - Sorting order - for options, refer SORTORDER_TYPES. + * @param {String} boxplotType - boxplot type - for options, refer BOXPLOT_TYPES. + */ + function sortByAttribute(callsites, metric, attribute, sortOrder, boxplotType) { + const SORT_MULTIPLIER = { + "asc": -1, + "desc": 1 + } + + if (!SORTORDER_TYPES.includes(sortOrder)) { + console.error("Invalid sortOrder. Use either 'asc' or 'desc'"); + } + + if (!BOXPLOT_TYPES.includes(boxplotType)) { + console.error("Invalid boxplot type. Use either 'tgt' or 'bkg'"); + } + + // Sanity check to see if the boxplotType (i.e., "tgt", "bkg") is present in the callsites. + let _is_empty = false; + Object.keys(callsites).map(function (key) { + if (callsites[key][boxplotType] === undefined) { + _is_empty = true; + } + }) + + let items = Object.keys(callsites).map(function (key) { + return [key, callsites[key][boxplotType]]; + }); + + if (!_is_empty) { + items = items.sort((first, second) => { + return SORT_MULTIPLIER[sortOrder] * (second[1][metric][attribute] - first[1][metric][attribute]); + }); + } + + return items.reduce(function (map, obj) { + if (obj[1] !== undefined) { + map[obj[0]] = obj[1][metric]; + } else { + map[obj[0]] = obj[1]; + } + return map; + }, {}); + } + + require(['d3', 'd3-utils'], (d3, d3_utils) => { + // -------------------------------------------------------------------------------- + // Main logic. + // -------------------------------------------------------------------------------- + const data = JSON.parse(variableString); + const callsites = Object.keys(data); + + // We add a random number to avoid deleting an existing boxplot in the + // jupyter cell. + // TODO: use the parent's id instead of random number. + const globals = Object.freeze({ + "id": "boxplot-vis-" + Math.ceil(Math.random() * 100), + "attributes": ["mean", "min", "max", "var", "imb", "kurt", "skew"], + "sortOrders": ["desc", "asc"], + "topNCallsites": [5, 10, 25, 100, "all"], + "tickCount": 5, + "boxContainerHeight": 200, + }) + + // State for the module. + const state = { + selectedMetric: null, + selectedAttribute: null, + selectedSortOrder: 'desc', + selectedTopNCallsites: 5, + }; + + menu(data); + const variance_dict = visualize(data); + variance_df = "'" + dict_to_csv(variance_dict, "tgt") + "'"; + + // -------------------------------------------------------------------------------- + // Visualization functions. + // -------------------------------------------------------------------------------- + /** + * Format the statistics runtime. We use the mantessa and exponent + * format. For more info, refer d3_utils.formatRuntime. + * + * @param {Object} d Statistics object + * @returns {Object} Formatted statistics object. + */ + function _format(d) { + return { + "min": d3_utils.formatRuntime(d.min), + "max": d3_utils.formatRuntime(d.max), + "mean": d3_utils.formatRuntime(d.mean), + "var": d3_utils.formatRuntime(d.var), + "imb": d3_utils.formatRuntime(d.imb), + "kurt": d3_utils.formatRuntime(d.kurt), + "skew": d3_utils.formatRuntime(d.skew) + }; + } + + /** + * Convert the stats dictionary to a csv. + * + * @param {Object} dict Statistics Object + * @param {Object} boxplotType - boxplot type - for options, refer BOXPLOT_TYPES. + * @return {String} result dictionary reformatted as a string (csv format) + */ + function dict_to_csv(dict, boxplotType) { + const callsites = Object.keys(dict); + const stat_columns = ["min", "max", "mean", "var", "imb", "kurt", "skew"] + let string = 'name,' + stat_columns.join(",") + ";"; + + for (let callsite of callsites){ + const d = dict[callsite][boxplotType]; + + let statsString = `${callsite},`; + for (let stat of stat_columns) { + if (Object.keys(d).includes(stat)) { + statsString += d[stat] + ","; + } + } + string += statsString.substring(0, statsString.length - 1) + ";"; + } + + const result = string.substring(0, string.length - 1) + + // Assertions to check if the right number of columns are being + // passed. + for (let str of result.split(";")) { + if (str.split(",").length !== stat_columns.length + 1){ + console.error("Mismatch in the number of stats metrics and data"); + console.debug("Columns: ", result.split(";")[0]); + console.debug("Data: ", str); + } + } + + return result; + } + + /** + * Renders menu view for selecting metric, attribute, sortOrder and + * callsites. + * + * @param {Object} data + */ + function menu(data) { + // Selection dropdown for metrics. + const metrics = Object.keys(data[callsites[0]]["tgt"]); + if (state.selectedMetric == null) state.selectedMetric = metrics[0] + const metricSelectTitle = "Metric: "; + const metricSelectId = "metricSelect"; + const metricOnChange = (d) => { + state.selectedMetric = d.target.value; + reset(); + }; + d3_utils.selectionDropDown(element, metrics, metricSelectId, metricSelectTitle, metricOnChange); + + // Selection dropdown for attributes. + if (state.selectedAttribute == null) state.selectedAttribute = globals.attributes[0]; + const attributeSelectTitle = "Sort by: "; + const attributeSelectId = "attributeSelect"; + const attributeOnChange = (d) => { + state.selectedAttribute = d.target.value; + reset(); + }; + d3_utils.selectionDropDown(element, globals.attributes, attributeSelectId, attributeSelectTitle, attributeOnChange); + + // Selection dropdown for sortOrder. + const sortOrderSelectTitle = "Sort order: "; + const sortOrderSelectId = "sortingSelect"; + const sortOrderOnChange = (d) => { + state.selectedSortOrder = d.target.value; + reset(); + }; + d3_utils.selectionDropDown(element, globals.sortOrders, sortOrderSelectId, sortOrderSelectTitle, sortOrderOnChange); + + // Selection dropdown for topNCallsites. + const topNCallsitesSelectTitle = "Top N callsites: "; + const topNCallsitesSelectId = "topNCallsitesSelect"; + const topNCallsitesOnChange = (d) => { + state.selectedTopNCallsites = d.target.value; + reset(); + }; + d3_utils.selectionDropDown(element, globals.topNCallsites, topNCallsitesSelectId, topNCallsitesSelectTitle, topNCallsitesOnChange); + } + + /** + * Renders the statistics as rows. + * + * @param {svg.g} g HTML element. + * @param {Object} d Data + * @param {String} boxplotType boxplot type - for options, refer BOXPLOT_TYPES. + * @param {Number} boxWidth Width of the boxplot view. + * + * d - format : {"tgt": stats, "bkg": stats } + */ + function visualizeStats(g, d, boxplotType, boxWidth) { + const stats = _format(d); + const TYPE_TEXTS = { + "tgt": "Target", + "bkg": "Background" + }; + + // Text fpr statistics title. + const xOffset = boxplotType === "tgt" ? 1.1 * boxWidth : 1.4 * boxWidth; + const textColor = boxplotType === "tgt" ? "#4DAF4A" : "#202020"; + + const statsG = g.append("g") + .attr("class", "stats"); + + d3_utils.drawText(statsG, TYPE_TEXTS[boxplotType], xOffset, 15, 0, textColor, "underline"); + + // Text for statistics + let statIdx = 1; + for (let [stat, val] of Object.entries(stats)) { + d3_utils.drawText(statsG, `${stat}: ${val}`, xOffset, 15, statIdx, textColor); + statIdx += 1; + } + } + + /** + * Renders boxplots for the callsites. + * + * @param {svg.g} g HTML element. + * @param {Object} d Data + * @param {String} boxplotType boxplot type - for options, refer BOXPLOT_TYPES. + * @param {d3.scale} xScale Scale for layouting the boxplot. + * @param {Boolean} drawCenterLine draws center line, if true. + */ + function visualizeBoxplot(g, d, type, xScale, drawCenterLine) { + const fillColor = { + "tgt": "#4DAF4A", + "bkg": "#D9D9D9" + }; + const strokeWidth = 1; + const boxYOffset = 30; + const strokeColor = "#202020"; + const boxHeight = 80; + + const boxG = g.append("g").attr("class", "box"); + + // Centerline + if (drawCenterLine) { + const [min, max] = xScale.domain(); + d3_utils.drawLine(boxG, xScale(min), boxYOffset + boxHeight / 2, xScale(max), boxYOffset + boxHeight / 2, strokeColor); + } + + // Tooltip + const tooltipWidth = 100; + const tooltipHeight = 30; + const tooltipText = `q1: ${d3_utils.formatRuntime(d.q[1])}, q3: ${d3_utils.formatRuntime(d.q[3])}`; + const mouseover = (event) => d3_utils.drawToolTip(boxG, event, tooltipText, tooltipWidth, tooltipHeight); + const mouseout = (event) => d3_utils.clearToolTip(boxG, event); + const click = (event) => d3_utils.drawToolTip(boxG, event, tooltipText, tooltipWidth, tooltipHeight); + + // Box + d3_utils.drawRect(boxG, { + "class": "rect", + "x": xScale(d.q[1]), + "y": boxYOffset, + "height": boxHeight, + "fill": fillColor[type], + "width": xScale(d.q[3]) - xScale(d.q[1]), + "stroke": strokeColor, + "stroke-width": strokeWidth + }, click, mouseover, mouseout); + + // Markers + const markerStrokeWidth = 3; + d3_utils.drawLine(boxG, xScale(d.q[0]), boxYOffset, xScale(d.q[0]), boxYOffset + boxHeight, fillColor[type], markerStrokeWidth); + d3_utils.drawLine(boxG, xScale(d.q[4]), boxYOffset, xScale(d.q[4]), boxYOffset + boxHeight, fillColor[type], markerStrokeWidth); + + // Outliers + const outlierRadius = 4; + let outliers = []; + for (let idx = 0; idx < d.ometric.length; idx += 1) { + outliers.push({ + x: xScale(d.ometric[idx]), + value: d.ometric[idx], + // rank: d.outliers["ranks"][idx], + y: 10 + }); + } + d3_utils.drawCircle(boxG, outliers, outlierRadius, fillColor[type]); + } + + /** + * Renders the vis for the provided callsites object. + * + * @param {Object} data + * @returns {Object} variance_dict = { "tgt": stats, "bkg": stats } + * + * data = { + * "callsite_name": { + * "tgt": { + * "metric1": stats, + * "metric2": stats, + * }, + * "bkg": { + * "metric1": stats, + * "metric2": stats, + * } + * } + * } + * + * stats = { + * "min": {float}, + * "max": {float}, + * "mean": {float}, + * "imb": {float}, + * "kurt": {float}, + * "skew": {float}, + * "q": {Array} = [q0, q1, q2, q3, q4], + * "outliers": {Object} = { + * "values": {Array}, + * "keys": {Array} + * } + * } + */ + function visualize(data) { + const variance_dict = {} + + const { selectedAttribute, selectedMetric, selectedSortOrder, selectedTopNCallsites } = state; + console.debug(`Selected metric: ${selectedAttribute}`); + console.debug(`Selected Attribute: ${selectedMetric}`); + console.debug(`Selected SortOrder: ${selectedSortOrder}`) + console.debug(`Selected Top N callsites: ${selectedTopNCallsites}`) + + // Sort the callsites by the selected attribute and metric. + const tgtCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, selectedSortOrder, "tgt"); + const bkgCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, selectedSortOrder, "bkg"); + + const callsites = [...new Set([...Object.keys(tgtCallsites), ...Object.keys(bkgCallsites)])]; + + let topNCallsites = callsites; + if(selectedTopNCallsites !== "all" && selectedTopNCallsites < callsites.length) { + topNCallsites = callsites.slice(0, selectedTopNCallsites); + } + + // Assign an index to the callsites. + const idxToNameMap = Object.assign({}, topNCallsites.map((callsite) => (callsite))); + const nameToIdxMap = Object.entries(idxToNameMap).reduce((acc, [key, value]) => (acc[value] = key, acc), {}); + + // Setup VIS area. + const margin = { top: 30, right: 0, bottom: 0, left: 0 }, + containerHeight = globals.boxContainerHeight * Object.keys(topNCallsites).length + 2 * margin.top, + width = element.clientWidth - margin.right - margin.left, + height = containerHeight - margin.top - margin.bottom; + const svgArea = d3_utils.prepareSvgArea(width, height, margin, globals.id); + const svg = d3_utils.prepareSvg(element, svgArea); + + d3_utils.drawText(svg, "Total number of callsites: " + callsites.length, 0, 0, 0, "#000", "underline"); + + const boxWidth = 0.6 * width; + for (let callsite of topNCallsites) { + let tgt = null; + if (callsite in tgtCallsites) tgt = tgtCallsites[callsite]; + + let bkg = null; + if (callsite in bkgCallsites) bkg = bkgCallsites[callsite]; + + // Set the min and max for xScale. + let min = 0, max = 0; + if (bkg === undefined) { + min = tgt.min; + max = tgt.max; + } else { + min = Math.min(tgt.min, bkg.min); + max = Math.max(tgt.max, bkg.max); + } + const xScale = d3.scaleLinear() + .domain([min, max]) + .range([0.05 * boxWidth, boxWidth - 0.05 * boxWidth]); + + // Set up a g container + const idx = nameToIdxMap[callsite]; + const gId = "box-" + idx; + const gYOffset = 200; + const g = svg.append("g") + .attr("id", gId) + .attr("width", boxWidth) + .attr("transform", "translate(0, " + ((gYOffset * idx) + 30) + ")"); + + const axisOffset = gYOffset * 0.6; + d3_utils.drawXAxis(g, xScale, globals.tickCount, d3_utils.formatRuntime, 0, axisOffset, "black"); + + // Text for callsite name. + const callsiteIndex = parseInt(idx) + 1 + d3_utils.drawText(g, `(${callsiteIndex}) Callsite : ` + callsite, 0, 0, 0, "#000"); + + visualizeStats(g, tgt, "tgt", boxWidth); + if (bkg !== undefined) { + visualizeStats(g, bkg, "bkg", boxWidth); + } + + visualizeBoxplot(g, tgt, "tgt", xScale, true); + if (bkg !== undefined) { + visualizeBoxplot(g, bkg, "bkg", xScale, false); + } + + variance_dict[callsite] = { tgt, bkg }; + } + + return variance_dict + } + + /** + * Clears the view and resets the view. + * + */ + function reset() { + d3_utils.clearSvg(globals.id); + const variance_dict = visualize(data); + variance_df = "'" + dict_to_csv(variance_dict, "tgt") + "'"; + } + }); +})(element); \ No newline at end of file diff --git a/hatchet/external/roundtrip/lib/d3_utils.js b/hatchet/external/roundtrip/lib/d3_utils.js new file mode 100644 index 00000000..78af0d19 --- /dev/null +++ b/hatchet/external/roundtrip/lib/d3_utils.js @@ -0,0 +1,206 @@ +define(function (require) { + const d3 = require("d3"); + + return { + calcContainerWidth: name => +d3.select(name).style('width').slice(0, -2), + calcContainerHeight: name => +d3.select(name).style('height').slice(0, -2), + calcCellWidth: (width, colNames) => width / colNames.length, + calcCellHeight: (height, rowNames) => height / rowNames.length, + calcCellSize: (width, height, colNames, rowNames, widthMax, heightMax) => [Math.min(calcCellWidth(width, colNames), widthMax), Math.min(calcCellHeight(height, rowNames), heightMax)], + + // SVG init. + prepareSvgArea: (windowWidth, windowHeight, margin, id) => { + return { + width: windowWidth - margin.left - margin.right, + height: windowHeight - margin.top - margin.bottom, + margin: margin, + id: id + } + }, + prepareSvg: (id, svgArea) => { + const svg = d3.select(id) + .append('svg') + .attr("id", svgArea.id) + .attr('width', svgArea.width + svgArea.margin.left + svgArea.margin.right) + .attr('height', svgArea.height + svgArea.margin.top + svgArea.margin.bottom) + .append('g') + .attr('transform', + 'translate(' + svgArea.margin.left + ',' + svgArea.margin.top + ')'); + + return svg; + }, + clearSvg: (id) => { + d3.selectAll("#" + id).remove(); + }, + initSvgInfo: (targetView, margin) => { + const sd = targetView.svgData; + const domId = targetView.domId; + + sd.svgArea = prepareSvgArea( + calcContainerWidth(`#${domId}`), + calcContainerHeight(`#${domId}`), margin || { + top: 0, + right: 0, + bottom: 0, + left: 0 + }) + sd.svg = prepareSvg(`#${domId}`, sd.svgArea); + sd.domId = targetView.domId; + }, + + // Axes, Scaling + genX: (data, svgArea, domain = null, scaler = d3.scaleLinear()) => { + if (domain === null) { + domain = d3.extent(data); + } + return scaler.domain(domain).range([0, svgArea.width]); + }, + genInvX: (data, svgArea, domain = null, scaler = d3.scaleLinear()) => { + if (domain === null) { + domain = d3.extent(data); + } + return scaler.domain([0, svgArea.width]).range(domain); + }, + genY: (data, svgArea, domain = null, scaler = d3.scaleLinear(), goUp = true) => { + if (domain === null) { + domain = d3.extent(data); + } + return goUp ? + scaler.domain(domain).range([svgArea.height, 0]) : + scaler.domain(domain).range([0, svgArea.height]); + }, + genInvY: (data, svgArea, domain = null, scaler = d3.scaleLinear()) => { + if (domain === null) { + domain = d3.extent(data); + } + return scaler.domain([svgArea.height, 0]).range(domain); + }, + + // UI Components + selectionDropDown: (element, data, id, title, onChange) => { + d3.select(element).append('label').attr('for', id).text(title); + const dropdown = d3.select(element).append("select") + .attr("id", id) + .style("margin", "10px 10px 10px 0px") + .on('change', onChange); + + const options = dropdown.selectAll('option') + .data(data) + .enter() + .append('option'); + + options.text(d => d) + .attr('value', d => d); + }, + + + // Formatting numbers + formatRuntime: (val) => { + if (val == 0) { + return val; + } + let format = d3.format(".3"); + return format(val); + }, + + // SVG elements + drawRect: (element, attrDict, click = () => { }, mouseover = () => { }, mouseout = () => { }) => { + return element.append("rect") + .attr("x", attrDict["x"]) + .attr("y", attrDict["y"]) + .attr("height", attrDict["height"]) + .attr("width", attrDict["width"]) + .attr("fill", attrDict["fill"]) + .attr("stroke", attrDict["stroke"]) + .attr("stroke-width", attrDict["stroke-width"]) + .on("click", click) + .on("mouseover", mouseover) + .on("mouseout", mouseout); + }, + drawText: (element, text, xOffset, yOffset, yOffsetIdx, textColor, textDecoration) => { + return element + .append('text') + .attr("x", xOffset) + .attr("y", yOffset * yOffsetIdx) + .attr("fill", textColor) + .attr("text-decoration", textDecoration) + .text(text); + }, + drawLine: (element, x1, y1, x2, y2, strokeColor, strokeWidth) => { + return element + .append("line") + .attr("class", "line") + .attr("x1", x1) + .attr("y1", y1) + .attr("x2", x2) + .attr("y2", y2) + .attr("stroke", strokeColor) + .style("stroke-width", strokeWidth); + }, + drawCircle: (element, data, radius, fillColor, click = () => { }, mouseover = () => { }, mouseout = () => { }) => { + return element + .selectAll(".circle") + .data(data) + .join("circle") + .attr("r", radius) + .attr("cx", (d) => d.x) + .attr("cy", (d) => d.y) + .attr("class", "circle") + .style("fill", fillColor) + .on("click", (d) => click(d)) + .on("mouseover", (d) => mouseover(d)) + .on("mouseout", (d) => mouseout(d)); + }, + drawXAxis: (element, xScale, numOfTicks, tickFormatFn, xOffset, yOffset, strokeColor) => { + const axis = d3.axisBottom(xScale) + .ticks(numOfTicks) + .tickFormat(tickFormatFn); + + const line = element.append("g") + .attr("class", "xAxis") + .attr("transform", `translate(${xOffset}, ${yOffset})`) + .call(axis); + + line.selectAll("path") + .style("fill", "none") + .style("stroke", strokeColor) + .style("stroke-width", "1px"); + + line.selectAll("line") + .style("fill", "none") + .style("stroke", strokeColor) + .style("stroke-width", "1px"); + + line.selectAll("text") + .style("font-size", "12px") + .style("font-family", "sans-serif") + .style("font-weight", "lighter"); + + return line; + }, + drawToolTip: (element, event, text, width, height) => { + const [mousePosX, mousePosY] = d3.pointer(event, element.node()); + const toolTipG = element + .append("g") + .attr("class", "tooltip") + .attr("transform", `translate(${mousePosX}, ${mousePosY})`) + + toolTipG.append("rect") + .attr("class", "tooltip-area") + .attr("width", width) + .attr("height", height) + .attr("fill", "#fff") + .attr("stroke", "#000"); + + toolTipG.append("text") + .attr("class", "tooltip-content") + .style("font-family", "sans-serif") + .style("font-size", "12px") + .attr("fill", "#000") + .text(text); + }, + clearToolTip: (element) => { + element.selectAll(".tooltip").remove(); + } + } +}); diff --git a/hatchet/external/roundtrip/roundtrip.py b/hatchet/external/roundtrip/roundtrip.py index fe414f93..75f4bb3b 100644 --- a/hatchet/external/roundtrip/roundtrip.py +++ b/hatchet/external/roundtrip/roundtrip.py @@ -1,6 +1,8 @@ from __future__ import print_function from IPython.core.magic import Magics, magics_class, line_magic from IPython.display import HTML, Javascript, display +import os +import jsonschema """ File: roundtrip.py @@ -16,6 +18,15 @@ class Roundtrip(Magics): # Note to self: Custom magic classes MUST call parent's constructor def __init__(self, shell): super(Roundtrip, self).__init__(shell) + global VIS_TO_FILE, VIS_TO_VALIDATION, VIS_TO_DATA + + VIS_TO_FILE = {"literal_tree": "roundtripTree.js", "boxplot": "boxplot.js"} + VIS_TO_VALIDATION = { + "literal_tree": self._validate_literal_tree, + "boxplot": self._validate_boxplot, + } + VIS_TO_DATA = {"literal_tree": "jsNodeSelected", "boxplot": "variance_df"} + self.id_number = 0 # Clean up namespace function display( @@ -34,26 +45,32 @@ def __init__(self, shell): codeMap = {} + def cleanLineArgument(self, arg): + if '"' in arg: + return arg.replace('"', "") + elif "'" in arg: + return arg.replace("'", "") + else: + # Path is a variable from the nb namespace + return self.shell.user_ns[arg] + @line_magic def loadVisualization(self, line): - # Get command line args for loading the vis + # Get command line args for loading the vis. args = line.split(" ") - name = "roundtripTreeVis" + str(self.id_number) - path = "" - if '"' in args[0]: - path = args[0].replace('"', "") - elif "'" in args[0]: - path = args[0].replace("'", "") - else: - # Path is a variable from the nb namespace - path = self.shell.user_ns[args[0]] + # Clean up the input arguments. + path = self.cleanLineArgument(args[0]) + visType = self.cleanLineArgument(args[1]) + data = self.shell.user_ns[args[2]] - fileAndPath = "" - if path[-1] == "/": - fileAndPath = path + "roundtripTree.js" - else: - fileAndPath = path + "/roundtripTree.js" + if visType not in VIS_TO_FILE.keys(): + assert f"Invalid visualization type provided. Valid types include {''.join(VIS_TO_FILE.keys())}" + + # Set a name to visualization cell. + name = "roundtripTreeVis" + str(self.id_number) + # Read the appropriate JS file. + fileAndPath = os.path.join(path, VIS_TO_FILE[visType]) javascriptFile = open(fileAndPath).read() # Source input files @@ -62,15 +79,26 @@ def loadVisualization(self, line): displayObj = display(HTML(argList), display_id=True) - if isinstance(self.shell.user_ns[args[1]], list): - args[1] = self.shell.user_ns[args[1]] - elif isinstance(self.shell.user_ns[args[1]], object): - args[1] = self.shell.user_ns[args[1]].to_literal() + displayObj.update(Javascript('argList.push("' + str(path) + '")')) + displayObj.update(Javascript('argList.push("' + str(visType) + '")')) + displayObj.update(Javascript('argList.push("' + str(data) + '")')) + + VIS_TO_VALIDATION[visType](data) + + # Get curent cell id. + self.codeMap[name] = javascriptFile + + preRun = """ + // Grab current context + elementTop = element.get(0);""" + displayObj.update(Javascript(preRun)) - displayObj.update(Javascript('argList.push("{}")'.format(str(args[1])))) + self.runVis(name, javascriptFile, visType) + self.id_number += 1 + def _validate_literal_tree(self, data): # Check that users provided a tree literal - if not isinstance(args[1], list): + if not isinstance(data, list): print( """The argument is not a tree literal or it is not a valid Python list. Please check that you have provided a list of nodes and nested children of the following form to loadVisualization: literal_tree = [{ @@ -87,19 +115,81 @@ def loadVisualization(self, line): ) raise Exception("Bad argument") - # Get curent cell id - self.codeMap[name] = javascriptFile - - preRun = """ - // Grab current context - elementTop = element.get(0);""" - - displayObj.update(Javascript(preRun)) - - self.runVis(name, javascriptFile, path) - self.id_number += 1 + def _validate_boxplot(self, data): + STATS_SCHEMA = { + "type": "object", + "properties": { + "min": {"type": "number"}, + "max": {"type": "number"}, + "mean": {"type": "number"}, + "imb": {"type": "number"}, + "var": {"type": "number"}, + "kurt": {"type": "number"}, + "skew": {"type": "number"}, + "q": {"type": "array"}, + "ocat": {"type": "array"}, + "ometric": {"type": "array"}, + "nid": {"type": "string"}, + "node": {"type": "object"}, + }, + } + + if isinstance(data, dict): + callsites = data.keys() + for cs in callsites: + if isinstance(data[cs], dict): + boxplotTypes = data[cs].keys() + for boxplotType in boxplotTypes: + if boxplotType in ["tgt", "bgk"]: + for metric in data[cs][boxplotType]: + jsonschema.validate( + instance=data[cs][boxplotType][metric], + schema=STATS_SCHEMA, + ) + else: + self._print_exception_boxplot() + raise Exception( + "Incorrect boxplot type key provided. Use 'tgt' or 'bgk'." + ) + else: + self._print_exception_boxplot() + raise Exception("Bad argument.") + else: + self._print_exception_boxplot() + raise Exception("Bad argument.") + + def _print_exception_boxplot(self): + print( + """The argument is not a valid boxplot dictionary. Please check that + you have provided the data in the following form to + loadVisualization: + boxplot = { + "tgt" : { + "metric1": { + "min": number, + "max": number, + "mean": number, + "imb": number, + "kurt": number, + "skew": number, + "q": [q0, q1, q2, q3, q4], + "outliers: { + "values": array, + "keys": array + } + }, + "metric2": { + ... + } + }, + "bkg": { + // Refer "tgt" key. + } + } + """ + ) - def runVis(self, name, javascriptFile, path): + def runVis(self, name, javascriptFile, visType): name = "roundtripTreeVis" + str(self.id_number) javascriptExport = """ @@ -117,20 +207,27 @@ def runVis(self, name, javascriptFile, path): display(HTML(javascriptExport)) @line_magic - def fetchData(self, dest): + def fetchData(self, line): # added eval() to 'execute' the JS list-as-string as a Python list + # Get command line args for loading the vis. + args = line.split(" ") + visType = self.cleanLineArgument(args[0]) + dest = args[1] + hook = ( """ - var holder = jsNodeSelected; + var holder = """ + + VIS_TO_DATA[visType] + + """; holder = '"' + holder + '"'; + console.debug('""" + + str(dest) + + """ = '+ holder); IPython.notebook.kernel.execute('""" + str(dest) + """ = '+ eval(holder)); - //console.log('""" - + str(dest) - + """ = '+ holder); - """ + """ ) display(Javascript(hook)) diff --git a/hatchet/external/roundtrip/roundtripTree.js b/hatchet/external/roundtrip/roundtripTree.js index 11dfaa6f..3be6be17 100644 --- a/hatchet/external/roundtrip/roundtripTree.js +++ b/hatchet/external/roundtrip/roundtripTree.js @@ -1,9 +1,45 @@ //d3.v4 (function (element) { - require(['https://d3js.org/d3.v4.min.js'], function (d3) { - - d3.select(element).attr('width', '100%'); + const [roundtrip_path, visType, variableString] = cleanInputs(argList); + + // Quit if visType is not literal_tree. + if (visType !== "literal_tree") { + console.error("Incorrect visualization type passed.") + return; + } + + // -------------------------------------------------------------------------------- + // RequireJS setup. + // -------------------------------------------------------------------------------- + // Setup the requireJS config to get required libraries. + requirejs.config({ + baseUrl: roundtrip_path, + paths: { + d3src: 'https://d3js.org', + lib: 'lib', + }, + map: { + '*': { + 'd3': 'd3src/d3.v4.min', + } + } + }); + // -------------------------------------------------------------------------------- + // Utility functions. + // -------------------------------------------------------------------------------- + // TODO: Move this to a common utils folder. + /** + * Utility to remove single quotes. + * + * @param {String} strings strings with single quotes. + * @returns {String} strings without single quotes. + */ + function cleanInputs(strings) { + return strings.map((_) => _.replace(/'/g, '"')); + } + + require(['d3'], function (d3) { const globals = Object.freeze({ UNIFIED: 0, DEFAULT: 0, @@ -292,25 +328,11 @@ "pruneEnabled": false }; - //setup model - var cleanTree = argList[0].replace(/'/g, '"').replace(/nan/g, '\"nan\"'); - var _forestData = JSON.parse(cleanTree); - - _data.numberOfTrees = _forestData.length; - _data.metricColumns = d3.keys(_forestData[0].metrics); - _data["attributeColumns"] = d3.keys(_forestData[0].attributes); - - for(var metric = 0; metric < _data.metricColumns.length; metric++){ - metricName = _data.metricColumns[metric]; - //remove private metric - if(_data.metricColumns[metric][0] == '_'){ - _data.metricColumns.splice(metric, 1); - } - else{ - //setup aggregrate min max for metric - _data.aggregateMinMax[metricName] = {min: Number.MAX_VALUE, max: Number.MIN_VALUE}; - } - } + //setup model + _data["forestData"] = JSON.parse(variableString); + _data["rootNodeNames"].push("Show all trees"); + _data["numberOfTrees"] = _data["forestData"].length; + _data["metricColumns"] = d3.keys(_data["forestData"][0].metrics); // pick the first metric listed to color the nodes _state.primaryMetric = _data.metricColumns[0]; diff --git a/requirements.txt b/requirements.txt index 0af39485..8e9ef8f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,5 @@ PyYAML cython multiprocess textX +jsonschema caliper-reader