Skip to content

Commit 2d9801a

Browse files
committed
initial commit - set up repo
0 parents  commit 2d9801a

17 files changed

+2292
-0
lines changed

AxProf/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__pycache__

AxProf/AxProf.py

+305
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
import __main__
2+
import math
3+
import numpy as np
4+
import os
5+
import sys
6+
from scipy.optimize import curve_fit
7+
from scipy.stats import chisquare, binom_test, combine_pvalues, ttest_1samp, wilcoxon
8+
import itertools
9+
import time
10+
import queue
11+
import scipy
12+
import pulp
13+
import subprocess
14+
from minepy import MINE
15+
import random
16+
17+
from AxProfUtil import extractConfigsFromDict
18+
from AxProfUtil import extractJobsFromConfigs
19+
from AxProfUtil import extractAllConfigs
20+
from AxProfUtil import writeDataToFile
21+
from AxProfUtil import dumpObtainedData
22+
from AxProfVis import visualizeOutput
23+
from AxProfGenerators import * #nothing but generators
24+
25+
defaultInputFileName = '_AxProf_input.txt'
26+
defaultOutputFileName = '_AxProf_output.txt'
27+
28+
29+
def checkDist(observed, expected, DDoF, pvalue=0.05):
30+
obslist = []
31+
explist = []
32+
if type(observed) == dict and type(expected) == dict:
33+
keys = set(observed.keys()).union(set(expected.keys()))
34+
for key in keys:
35+
obslist.append(observed.get(key, 0))
36+
explist.append(expected.get(key, 0))
37+
else:
38+
obslist = observed
39+
explist = expected
40+
chisq, p = chisquare(obslist, explist, DDoF)
41+
return p >= pvalue
42+
43+
44+
def checkFreq(observed, trials, expProb, pvalue=0.05, alternative='two-sided'):
45+
if observed > 0 and expProb == 0:
46+
return False
47+
p = binom_test(observed, trials, expProb, alternative)
48+
return p >= pvalue
49+
50+
51+
def binomialTest(observed, trials, expProb, alternative='two-sided'):
52+
if observed > 0 and expProb == 0:
53+
return 0
54+
return binom_test(observed, trials, expProb, alternative)
55+
56+
57+
def fitFuncToData(data, func, funcParams, paramNames):
58+
datalen = len(data)
59+
numParams = len(funcParams)
60+
arrays = (numParams + 1) * [None]
61+
for i in range(numParams + 1):
62+
arrays[i] = datalen * [0]
63+
64+
arrayCtr = 0
65+
for config, val in data.items():
66+
for i in range(numParams):
67+
arrays[i][arrayCtr] = config[paramNames.index(funcParams[i])]
68+
arrays[numParams][arrayCtr] = val
69+
arrayCtr += 1
70+
71+
for i in range(numParams + 1):
72+
arrays[i] = np.array(arrays[i])
73+
popt, pcov = curve_fit(func, arrays[:-1], arrays[-1])
74+
residuals = arrays[-1] - func(arrays[:-1], *popt)
75+
sum_sqd_residuals = np.sum(residuals**2)
76+
sum_sqd_total = np.sum((arrays[-1] - np.mean(arrays[-1]))**2)
77+
r_sqd = 1 - sum_sqd_residuals / sum_sqd_total
78+
return popt, r_sqd
79+
80+
81+
def generateFunctionsFromSpec(spec):
82+
# Writing spec to file to use with the java antlr backend
83+
tempSpecFile = open("/tmp/axprofspec", "w")
84+
tempSpecFile.write(spec)
85+
tempSpecFile.close()
86+
checkerGenPath = os.path.dirname(__file__)+'/checkerGen/'
87+
genCmd = ['java', '-ea', '-cp',
88+
checkerGenPath+'antlr-4.7.1-complete.jar:'+checkerGenPath,
89+
'MainClass', "/tmp/axprofspec"]
90+
pipes = subprocess.run(args=genCmd, stdout=subprocess.PIPE,
91+
stderr=subprocess.PIPE)
92+
out, err = pipes.stdout, pipes.stderr
93+
out = out.decode("utf-8")
94+
err = err.decode("utf-8")
95+
if(err == ""):
96+
scriptFile = '.'.join(__main__.__file__.split('.')[:-1])
97+
out = "from __main__ import *\n\n" + out
98+
out = out.replace("%FILENAME%", scriptFile)
99+
newFunctions = {}
100+
exec(out, newFunctions)
101+
print("Generated following checker functions from spec:")
102+
print(out)
103+
return newFunctions
104+
else:
105+
print("Error while generating checker functions:")
106+
print(err)
107+
exit(1)
108+
109+
110+
def checkProperties(configDict, runs, inputs, inputGen, inputGenParams, runner,
111+
inpAgg=None, cfgAgg=None, perRunFunc=None, perInpFunc=None,
112+
perConfigFunc=None, finalFunc=None, spec=None, skipAcc=False):
113+
114+
if not os.path.isdir('outputs'):
115+
os.mkdir('outputs')
116+
print("Created 'outputs' directory for time and memory data")
117+
else:
118+
print("Using existing 'outputs' directory for time and memory data")
119+
120+
if(spec is not None):
121+
newFunctions = generateFunctionsFromSpec(spec)
122+
# make the new functions local
123+
if (inpAgg is None):
124+
inpAgg = newFunctions['inpAgg'] if ('inpAgg' in newFunctions) else None
125+
if(cfgAgg is None):
126+
cfgAgg = newFunctions['cfgAgg'] if ('cfgAgg' in newFunctions) else None
127+
if(perRunFunc is None):
128+
perRunFunc = newFunctions['perRunFunc'] if ('perRunFunc' in newFunctions) else None
129+
if(perInpFunc is None):
130+
perInpFunc = newFunctions['perInpFunc'] if ('perInpFunc' in newFunctions) else None
131+
if(perConfigFunc is None):
132+
perConfigFunc = newFunctions['perConfigFunc'] if ('perConfigFunc' in newFunctions) else None
133+
if(finalFunc is None):
134+
finalFunc = newFunctions['finalFunc'] if ('finalFunc' in newFunctions) else None
135+
print(inpAgg, cfgAgg, perRunFunc, perInpFunc, perConfigFunc, finalFunc)
136+
else:
137+
print("No specification provided, using user-provided functions directly")
138+
139+
if runs is None:
140+
if (perRunFunc is None) and (perInpFunc is not None):
141+
runs = 200
142+
elif (perRunFunc is not None) and (perInpFunc is None):
143+
runs = 320
144+
else:
145+
runs = 320 # cannot decide, be conservative
146+
print("Selected no. of required runs:", runs)
147+
else:
148+
print("Using user-provided no. of runs:", runs)
149+
150+
if inputs is None:
151+
if perConfigFunc is None:
152+
inputs = 1
153+
else:
154+
inputs = 200
155+
print("Selected no. of required inputs:", inputs)
156+
else:
157+
print("Using user-provided no. of inputs:", inputs)
158+
159+
# Build a list of configurations to be tested
160+
paramNames, configList = extractConfigsFromDict(configDict)
161+
outputList = dict.fromkeys(configList)
162+
163+
# Run each configuration and run the checker functions
164+
for config in configList:
165+
cfgAggregate = None
166+
thisConfigDict = {}
167+
for name in paramNames:
168+
thisConfigDict[name] = config[paramNames.index(name)]
169+
print("Running test program for configuration", thisConfigDict)
170+
171+
for input_num in range(inputs):
172+
print("Input", input_num + 1)
173+
inpAggregate = None
174+
configIGParams = inputGenParams(thisConfigDict, input_num)
175+
inputData = inputGen(*configIGParams)
176+
writeDataToFile(inputData, defaultInputFileName)
177+
for run in range(runs):
178+
sys.stdout.write('.')
179+
sys.stdout.flush()
180+
output = runner(defaultInputFileName, thisConfigDict)
181+
if perRunFunc:
182+
if not skipAcc:
183+
perRunFunc(thisConfigDict, inputData, output)
184+
if inpAgg:
185+
inpAggregate = inpAgg(inpAggregate, run, output)
186+
sys.stdout.write('\n')
187+
sys.stdout.flush()
188+
if perInpFunc:
189+
if not skipAcc:
190+
perInpFunc(thisConfigDict, inputData, runs, inpAggregate)
191+
if cfgAgg:
192+
cfgAggregate = cfgAgg(cfgAggregate, input_num, inpAggregate)
193+
elif inputs == 1:
194+
cfgAggregate = inpAggregate
195+
else:
196+
# print("[Warning] Using the default configuration aggregator")
197+
if cfgAggregate is None:
198+
cfgAggregate = [inpAggregate]
199+
cfgAggregate.append(inpAggregate)
200+
if perConfigFunc:
201+
if not skipAcc:
202+
perConfigFunc(thisConfigDict, runs, inputs, cfgAggregate)
203+
outputList[config] = cfgAggregate
204+
if finalFunc:
205+
finalFunc(paramNames, outputList, runs, inputs)
206+
os.system("rm -f {} {} _axprof_temp_input".format(defaultInputFileName, defaultOutputFileName))
207+
208+
209+
def selectInputFeatures(configs, inputGenerator, igparams,
210+
tunedFeatures, error_function, runner, num_runs=5):
211+
212+
permutation = [1, 2, 3, 4, 5]
213+
214+
print('Starting the input feature selection process')
215+
# Build a set of configs that only change input parameters.
216+
# For the remaining parameters chose at random
217+
newConfigs = {}
218+
for key in configs.keys():
219+
if key in tunedFeatures:
220+
newConfigs[key] = configs[key]
221+
else:
222+
newConfigs[key] = [random.choice(configs[key])]
223+
configList = extractAllConfigs(newConfigs)
224+
225+
tot_runs = num_runs * len(permutation) + num_runs * len(configList)
226+
print('Requires {} executions'.format(tot_runs))
227+
228+
# Does permuting the data affect accuracy?
229+
result_set = []
230+
perm_feat = False
231+
tmp_config = random.choice(configList)._asdict()
232+
for perm in permutation:
233+
configIGParams = igparams(tmp_config, 0)
234+
inputData = inputGenerator(*configIGParams)
235+
new_inputs = inputData.copy()
236+
random.shuffle(new_inputs)
237+
writeDataToFile(new_inputs, "_axprof_temp_input")
238+
239+
# Averaging over a set of runs.
240+
error_tot = 0
241+
for run in range(num_runs):
242+
results = runner("_axprof_temp_input", tmp_config)
243+
error_tot += error_function(new_inputs, results['acc'])
244+
sys.stdout.write('.')
245+
sys.stdout.flush()
246+
result_set.append(error_tot / num_runs)
247+
mine = MINE()
248+
mine.compute_score(permutation, result_set)
249+
perm_mic = mine.mic()
250+
if perm_mic > 0.9:
251+
perm_feat = True
252+
253+
# Testing the other features
254+
result_set = {}
255+
for config in configList:
256+
# Setting the number to low value for now
257+
for input_num in range(5):
258+
inpAggregate = None
259+
configIGParams = igparams(config._asdict(), input_num)
260+
inputData = inputGenerator(*configIGParams)
261+
writeDataToFile(new_inputs, "_axprof_temp_input")
262+
error_tot = 0
263+
for run in range(num_runs):
264+
sys.stdout.write('.')
265+
sys.stdout.flush()
266+
results = runner("_axprof_temp_input", tmp_config)
267+
error_tot += error_function(new_inputs, results['acc'])
268+
result_set[config] = error_tot / num_runs
269+
270+
sys.stdout.write('\n')
271+
sys.stdout.flush()
272+
273+
mics = {}
274+
for key in tunedFeatures:
275+
agg_y = {}
276+
for config in result_set:
277+
config_dict = config._asdict()
278+
if config_dict[key] in agg_y:
279+
agg_y[config_dict[key]].append(result_set[config])
280+
else:
281+
agg_y[config_dict[key]] = [result_set[config]]
282+
283+
unique_x = list(agg_y.keys())
284+
y = []
285+
for x in unique_x:
286+
y.append(np.mean(agg_y[x]))
287+
mine = MINE()
288+
mine.compute_score(unique_x, y)
289+
mics[key] = mine.mic()
290+
291+
# Removing the variations in features that are not important
292+
for key in tunedFeatures:
293+
if mics[key] < 0.9:
294+
current = configs[key]
295+
configs[key] = [random.choice(current)]
296+
297+
# Printing the report
298+
print('----------------------------------------')
299+
print('The results of input feature selection: ')
300+
print("Permuting the input: (MIC: {})".format(perm_mic))
301+
for key in tunedFeatures:
302+
print("{}: (MIC: {})".format(key, mics[key]))
303+
print("Updated config list: ", configs)
304+
print('----------------------------------------')
305+
return configs

AxProf/AxProfGenerators.py

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# data generators for AxProf
2+
3+
import numpy as np
4+
import random
5+
import math
6+
from AxProfUtil import writeDataToFile
7+
import scipy
8+
9+
10+
# Generates numbers of the form a*i+b for i in [0..length)
11+
def linearGenerator(length, a, b):
12+
output = length * [0]
13+
for i in range(length):
14+
output[i] = a * i + b
15+
return output
16+
17+
18+
# Generates uniformly chosen numbers within [_min,_max)
19+
def uniformGenerator(length, _min, _max, seed=None):
20+
random.seed(seed)
21+
return [random.uniform(_min, _max) for _ in range(length)]
22+
23+
24+
# Samples WITHOUT REPLACEMENT integers within [_min,_max]
25+
def distinctIntegerGenerator(length, _min, _max, seed=None):
26+
random.seed(seed)
27+
return random.sample(range(_min,_max+1),length)
28+
29+
30+
# Generates points in any number of dimensions
31+
# dims specifies number of dimensions
32+
# minCoord and maxCoord specify minimum and maximum values for each coordinate
33+
def pointsGenerator(length, dims, minCoord, maxCoord, seed=None):
34+
random.seed(seed)
35+
output = length * [None]
36+
for i in range(length):
37+
output[i] = dims * [0]
38+
for j in range(dims):
39+
output[i][j] = random.randrange(minCoord, maxCoord)
40+
return output
41+
42+
43+
# Generates integers in a zipf distribution with the given skew
44+
# Keeps numbers below 2^31-1 to prevent overflow in C-style ints
45+
def zipfGenerator(length, skew, seed=None):
46+
np.random.seed(seed)
47+
data_set = []
48+
generated = 0
49+
while(generated < length):
50+
draw = np.random.zipf(skew, 1)[0]
51+
if draw < 2147483647:
52+
data_set.append(draw)
53+
generated += 1
54+
return data_set
55+
56+
57+
# Generates multiple matrices with the given specification list
58+
# specs is a list of 3 tuples: (NUM,ROWS,COLS)
59+
# each tuple generates NUM matrices of size ROWSxCOLS
60+
def matrixGenerator(specs):
61+
output = []
62+
for spec in specs:
63+
howmany,rows,cols = spec
64+
for i in range(howmany):
65+
output.append(np.random.rand(rows,cols))
66+
return output
67+
68+
69+
# Generates a single matrix of size l*m and flattens it into a list
70+
def flattenedMatrixGenerator(l, m):
71+
A = np.random.rand(l, m)
72+
return list(np.matrix.flatten(A))
73+
74+
75+
# Do not generate meaningful input
76+
# used when script provides external input
77+
def dummyGenerator(x):
78+
return [x]

0 commit comments

Comments
 (0)