|
| 1 | +import __main__ |
| 2 | +import math |
| 3 | +import numpy as np |
| 4 | +import os |
| 5 | +import sys |
| 6 | +from scipy.optimize import curve_fit |
| 7 | +from scipy.stats import chisquare, binom_test, combine_pvalues, ttest_1samp, wilcoxon |
| 8 | +import itertools |
| 9 | +import time |
| 10 | +import queue |
| 11 | +import scipy |
| 12 | +import pulp |
| 13 | +import subprocess |
| 14 | +from minepy import MINE |
| 15 | +import random |
| 16 | + |
| 17 | +from AxProfUtil import extractConfigsFromDict |
| 18 | +from AxProfUtil import extractJobsFromConfigs |
| 19 | +from AxProfUtil import extractAllConfigs |
| 20 | +from AxProfUtil import writeDataToFile |
| 21 | +from AxProfUtil import dumpObtainedData |
| 22 | +from AxProfVis import visualizeOutput |
| 23 | +from AxProfGenerators import * #nothing but generators |
| 24 | + |
| 25 | +defaultInputFileName = '_AxProf_input.txt' |
| 26 | +defaultOutputFileName = '_AxProf_output.txt' |
| 27 | + |
| 28 | + |
| 29 | +def checkDist(observed, expected, DDoF, pvalue=0.05): |
| 30 | + obslist = [] |
| 31 | + explist = [] |
| 32 | + if type(observed) == dict and type(expected) == dict: |
| 33 | + keys = set(observed.keys()).union(set(expected.keys())) |
| 34 | + for key in keys: |
| 35 | + obslist.append(observed.get(key, 0)) |
| 36 | + explist.append(expected.get(key, 0)) |
| 37 | + else: |
| 38 | + obslist = observed |
| 39 | + explist = expected |
| 40 | + chisq, p = chisquare(obslist, explist, DDoF) |
| 41 | + return p >= pvalue |
| 42 | + |
| 43 | + |
| 44 | +def checkFreq(observed, trials, expProb, pvalue=0.05, alternative='two-sided'): |
| 45 | + if observed > 0 and expProb == 0: |
| 46 | + return False |
| 47 | + p = binom_test(observed, trials, expProb, alternative) |
| 48 | + return p >= pvalue |
| 49 | + |
| 50 | + |
| 51 | +def binomialTest(observed, trials, expProb, alternative='two-sided'): |
| 52 | + if observed > 0 and expProb == 0: |
| 53 | + return 0 |
| 54 | + return binom_test(observed, trials, expProb, alternative) |
| 55 | + |
| 56 | + |
| 57 | +def fitFuncToData(data, func, funcParams, paramNames): |
| 58 | + datalen = len(data) |
| 59 | + numParams = len(funcParams) |
| 60 | + arrays = (numParams + 1) * [None] |
| 61 | + for i in range(numParams + 1): |
| 62 | + arrays[i] = datalen * [0] |
| 63 | + |
| 64 | + arrayCtr = 0 |
| 65 | + for config, val in data.items(): |
| 66 | + for i in range(numParams): |
| 67 | + arrays[i][arrayCtr] = config[paramNames.index(funcParams[i])] |
| 68 | + arrays[numParams][arrayCtr] = val |
| 69 | + arrayCtr += 1 |
| 70 | + |
| 71 | + for i in range(numParams + 1): |
| 72 | + arrays[i] = np.array(arrays[i]) |
| 73 | + popt, pcov = curve_fit(func, arrays[:-1], arrays[-1]) |
| 74 | + residuals = arrays[-1] - func(arrays[:-1], *popt) |
| 75 | + sum_sqd_residuals = np.sum(residuals**2) |
| 76 | + sum_sqd_total = np.sum((arrays[-1] - np.mean(arrays[-1]))**2) |
| 77 | + r_sqd = 1 - sum_sqd_residuals / sum_sqd_total |
| 78 | + return popt, r_sqd |
| 79 | + |
| 80 | + |
| 81 | +def generateFunctionsFromSpec(spec): |
| 82 | + # Writing spec to file to use with the java antlr backend |
| 83 | + tempSpecFile = open("/tmp/axprofspec", "w") |
| 84 | + tempSpecFile.write(spec) |
| 85 | + tempSpecFile.close() |
| 86 | + checkerGenPath = os.path.dirname(__file__)+'/checkerGen/' |
| 87 | + genCmd = ['java', '-ea', '-cp', |
| 88 | + checkerGenPath+'antlr-4.7.1-complete.jar:'+checkerGenPath, |
| 89 | + 'MainClass', "/tmp/axprofspec"] |
| 90 | + pipes = subprocess.run(args=genCmd, stdout=subprocess.PIPE, |
| 91 | + stderr=subprocess.PIPE) |
| 92 | + out, err = pipes.stdout, pipes.stderr |
| 93 | + out = out.decode("utf-8") |
| 94 | + err = err.decode("utf-8") |
| 95 | + if(err == ""): |
| 96 | + scriptFile = '.'.join(__main__.__file__.split('.')[:-1]) |
| 97 | + out = "from __main__ import *\n\n" + out |
| 98 | + out = out.replace("%FILENAME%", scriptFile) |
| 99 | + newFunctions = {} |
| 100 | + exec(out, newFunctions) |
| 101 | + print("Generated following checker functions from spec:") |
| 102 | + print(out) |
| 103 | + return newFunctions |
| 104 | + else: |
| 105 | + print("Error while generating checker functions:") |
| 106 | + print(err) |
| 107 | + exit(1) |
| 108 | + |
| 109 | + |
| 110 | +def checkProperties(configDict, runs, inputs, inputGen, inputGenParams, runner, |
| 111 | + inpAgg=None, cfgAgg=None, perRunFunc=None, perInpFunc=None, |
| 112 | + perConfigFunc=None, finalFunc=None, spec=None, skipAcc=False): |
| 113 | + |
| 114 | + if not os.path.isdir('outputs'): |
| 115 | + os.mkdir('outputs') |
| 116 | + print("Created 'outputs' directory for time and memory data") |
| 117 | + else: |
| 118 | + print("Using existing 'outputs' directory for time and memory data") |
| 119 | + |
| 120 | + if(spec is not None): |
| 121 | + newFunctions = generateFunctionsFromSpec(spec) |
| 122 | + # make the new functions local |
| 123 | + if (inpAgg is None): |
| 124 | + inpAgg = newFunctions['inpAgg'] if ('inpAgg' in newFunctions) else None |
| 125 | + if(cfgAgg is None): |
| 126 | + cfgAgg = newFunctions['cfgAgg'] if ('cfgAgg' in newFunctions) else None |
| 127 | + if(perRunFunc is None): |
| 128 | + perRunFunc = newFunctions['perRunFunc'] if ('perRunFunc' in newFunctions) else None |
| 129 | + if(perInpFunc is None): |
| 130 | + perInpFunc = newFunctions['perInpFunc'] if ('perInpFunc' in newFunctions) else None |
| 131 | + if(perConfigFunc is None): |
| 132 | + perConfigFunc = newFunctions['perConfigFunc'] if ('perConfigFunc' in newFunctions) else None |
| 133 | + if(finalFunc is None): |
| 134 | + finalFunc = newFunctions['finalFunc'] if ('finalFunc' in newFunctions) else None |
| 135 | + print(inpAgg, cfgAgg, perRunFunc, perInpFunc, perConfigFunc, finalFunc) |
| 136 | + else: |
| 137 | + print("No specification provided, using user-provided functions directly") |
| 138 | + |
| 139 | + if runs is None: |
| 140 | + if (perRunFunc is None) and (perInpFunc is not None): |
| 141 | + runs = 200 |
| 142 | + elif (perRunFunc is not None) and (perInpFunc is None): |
| 143 | + runs = 320 |
| 144 | + else: |
| 145 | + runs = 320 # cannot decide, be conservative |
| 146 | + print("Selected no. of required runs:", runs) |
| 147 | + else: |
| 148 | + print("Using user-provided no. of runs:", runs) |
| 149 | + |
| 150 | + if inputs is None: |
| 151 | + if perConfigFunc is None: |
| 152 | + inputs = 1 |
| 153 | + else: |
| 154 | + inputs = 200 |
| 155 | + print("Selected no. of required inputs:", inputs) |
| 156 | + else: |
| 157 | + print("Using user-provided no. of inputs:", inputs) |
| 158 | + |
| 159 | + # Build a list of configurations to be tested |
| 160 | + paramNames, configList = extractConfigsFromDict(configDict) |
| 161 | + outputList = dict.fromkeys(configList) |
| 162 | + |
| 163 | + # Run each configuration and run the checker functions |
| 164 | + for config in configList: |
| 165 | + cfgAggregate = None |
| 166 | + thisConfigDict = {} |
| 167 | + for name in paramNames: |
| 168 | + thisConfigDict[name] = config[paramNames.index(name)] |
| 169 | + print("Running test program for configuration", thisConfigDict) |
| 170 | + |
| 171 | + for input_num in range(inputs): |
| 172 | + print("Input", input_num + 1) |
| 173 | + inpAggregate = None |
| 174 | + configIGParams = inputGenParams(thisConfigDict, input_num) |
| 175 | + inputData = inputGen(*configIGParams) |
| 176 | + writeDataToFile(inputData, defaultInputFileName) |
| 177 | + for run in range(runs): |
| 178 | + sys.stdout.write('.') |
| 179 | + sys.stdout.flush() |
| 180 | + output = runner(defaultInputFileName, thisConfigDict) |
| 181 | + if perRunFunc: |
| 182 | + if not skipAcc: |
| 183 | + perRunFunc(thisConfigDict, inputData, output) |
| 184 | + if inpAgg: |
| 185 | + inpAggregate = inpAgg(inpAggregate, run, output) |
| 186 | + sys.stdout.write('\n') |
| 187 | + sys.stdout.flush() |
| 188 | + if perInpFunc: |
| 189 | + if not skipAcc: |
| 190 | + perInpFunc(thisConfigDict, inputData, runs, inpAggregate) |
| 191 | + if cfgAgg: |
| 192 | + cfgAggregate = cfgAgg(cfgAggregate, input_num, inpAggregate) |
| 193 | + elif inputs == 1: |
| 194 | + cfgAggregate = inpAggregate |
| 195 | + else: |
| 196 | + # print("[Warning] Using the default configuration aggregator") |
| 197 | + if cfgAggregate is None: |
| 198 | + cfgAggregate = [inpAggregate] |
| 199 | + cfgAggregate.append(inpAggregate) |
| 200 | + if perConfigFunc: |
| 201 | + if not skipAcc: |
| 202 | + perConfigFunc(thisConfigDict, runs, inputs, cfgAggregate) |
| 203 | + outputList[config] = cfgAggregate |
| 204 | + if finalFunc: |
| 205 | + finalFunc(paramNames, outputList, runs, inputs) |
| 206 | + os.system("rm -f {} {} _axprof_temp_input".format(defaultInputFileName, defaultOutputFileName)) |
| 207 | + |
| 208 | + |
| 209 | +def selectInputFeatures(configs, inputGenerator, igparams, |
| 210 | + tunedFeatures, error_function, runner, num_runs=5): |
| 211 | + |
| 212 | + permutation = [1, 2, 3, 4, 5] |
| 213 | + |
| 214 | + print('Starting the input feature selection process') |
| 215 | + # Build a set of configs that only change input parameters. |
| 216 | + # For the remaining parameters chose at random |
| 217 | + newConfigs = {} |
| 218 | + for key in configs.keys(): |
| 219 | + if key in tunedFeatures: |
| 220 | + newConfigs[key] = configs[key] |
| 221 | + else: |
| 222 | + newConfigs[key] = [random.choice(configs[key])] |
| 223 | + configList = extractAllConfigs(newConfigs) |
| 224 | + |
| 225 | + tot_runs = num_runs * len(permutation) + num_runs * len(configList) |
| 226 | + print('Requires {} executions'.format(tot_runs)) |
| 227 | + |
| 228 | + # Does permuting the data affect accuracy? |
| 229 | + result_set = [] |
| 230 | + perm_feat = False |
| 231 | + tmp_config = random.choice(configList)._asdict() |
| 232 | + for perm in permutation: |
| 233 | + configIGParams = igparams(tmp_config, 0) |
| 234 | + inputData = inputGenerator(*configIGParams) |
| 235 | + new_inputs = inputData.copy() |
| 236 | + random.shuffle(new_inputs) |
| 237 | + writeDataToFile(new_inputs, "_axprof_temp_input") |
| 238 | + |
| 239 | + # Averaging over a set of runs. |
| 240 | + error_tot = 0 |
| 241 | + for run in range(num_runs): |
| 242 | + results = runner("_axprof_temp_input", tmp_config) |
| 243 | + error_tot += error_function(new_inputs, results['acc']) |
| 244 | + sys.stdout.write('.') |
| 245 | + sys.stdout.flush() |
| 246 | + result_set.append(error_tot / num_runs) |
| 247 | + mine = MINE() |
| 248 | + mine.compute_score(permutation, result_set) |
| 249 | + perm_mic = mine.mic() |
| 250 | + if perm_mic > 0.9: |
| 251 | + perm_feat = True |
| 252 | + |
| 253 | + # Testing the other features |
| 254 | + result_set = {} |
| 255 | + for config in configList: |
| 256 | + # Setting the number to low value for now |
| 257 | + for input_num in range(5): |
| 258 | + inpAggregate = None |
| 259 | + configIGParams = igparams(config._asdict(), input_num) |
| 260 | + inputData = inputGenerator(*configIGParams) |
| 261 | + writeDataToFile(new_inputs, "_axprof_temp_input") |
| 262 | + error_tot = 0 |
| 263 | + for run in range(num_runs): |
| 264 | + sys.stdout.write('.') |
| 265 | + sys.stdout.flush() |
| 266 | + results = runner("_axprof_temp_input", tmp_config) |
| 267 | + error_tot += error_function(new_inputs, results['acc']) |
| 268 | + result_set[config] = error_tot / num_runs |
| 269 | + |
| 270 | + sys.stdout.write('\n') |
| 271 | + sys.stdout.flush() |
| 272 | + |
| 273 | + mics = {} |
| 274 | + for key in tunedFeatures: |
| 275 | + agg_y = {} |
| 276 | + for config in result_set: |
| 277 | + config_dict = config._asdict() |
| 278 | + if config_dict[key] in agg_y: |
| 279 | + agg_y[config_dict[key]].append(result_set[config]) |
| 280 | + else: |
| 281 | + agg_y[config_dict[key]] = [result_set[config]] |
| 282 | + |
| 283 | + unique_x = list(agg_y.keys()) |
| 284 | + y = [] |
| 285 | + for x in unique_x: |
| 286 | + y.append(np.mean(agg_y[x])) |
| 287 | + mine = MINE() |
| 288 | + mine.compute_score(unique_x, y) |
| 289 | + mics[key] = mine.mic() |
| 290 | + |
| 291 | + # Removing the variations in features that are not important |
| 292 | + for key in tunedFeatures: |
| 293 | + if mics[key] < 0.9: |
| 294 | + current = configs[key] |
| 295 | + configs[key] = [random.choice(current)] |
| 296 | + |
| 297 | + # Printing the report |
| 298 | + print('----------------------------------------') |
| 299 | + print('The results of input feature selection: ') |
| 300 | + print("Permuting the input: (MIC: {})".format(perm_mic)) |
| 301 | + for key in tunedFeatures: |
| 302 | + print("{}: (MIC: {})".format(key, mics[key])) |
| 303 | + print("Updated config list: ", configs) |
| 304 | + print('----------------------------------------') |
| 305 | + return configs |
0 commit comments