Skip to content

Commit 62e42b0

Browse files
authored
Merge pull request #39 from secure-software-engineering/meka
Add Multi-label ML approach using MEKA
2 parents 8abba7a + c54207e commit 62e42b0

22 files changed

+1081
-625
lines changed

swan-pipeline/pom.xml

Lines changed: 331 additions & 321 deletions
Large diffs are not rendered by default.

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/SwanPipeline.java

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
package de.fraunhofer.iem.swan;
22

3-
import com.fasterxml.jackson.databind.ObjectMapper;
43
import de.fraunhofer.iem.swan.cli.SwanOptions;
5-
import de.fraunhofer.iem.swan.features.FeaturesHandler;
4+
import de.fraunhofer.iem.swan.features.FeatureSetSelector;
5+
import de.fraunhofer.iem.swan.features.IFeatureSet;
66
import de.fraunhofer.iem.swan.features.code.soot.SourceFileLoader;
77
import de.fraunhofer.iem.swan.io.dataset.SrmList;
88
import de.fraunhofer.iem.swan.io.dataset.SrmListUtils;
99
import de.fraunhofer.iem.swan.model.ModelEvaluator;
1010
import de.fraunhofer.iem.swan.util.Util;
1111
import org.slf4j.Logger;
1212
import org.slf4j.LoggerFactory;
13-
import java.io.File;
13+
1414
import java.io.IOException;
1515

1616
/**
@@ -48,19 +48,13 @@ public void run() throws IOException, InterruptedException {
4848
testDataset.load(dataset.getMethods());
4949

5050
//Initialize and populate features
51-
FeaturesHandler featuresHandler = new FeaturesHandler(dataset, testDataset, options);
52-
featuresHandler.createFeatures();
51+
FeatureSetSelector featureSetSelector = new FeatureSetSelector();
52+
IFeatureSet featureSet = featureSetSelector.select(dataset, testDataset, options);
5353

5454
//Train and evaluate model for SRM and CWE categories
55-
ModelEvaluator modelEvaluator = new ModelEvaluator(featuresHandler, options);
55+
ModelEvaluator modelEvaluator = new ModelEvaluator(featureSet, options, testDataset.getMethods());
5656
modelEvaluator.trainModel();
5757

58-
//TODO export final list to JSON file
59-
String outputFile = options.getOutputDir() + File.separator + "swan-srm-cwe-list.json";
60-
ObjectMapper objectMapper = new ObjectMapper();
61-
objectMapper.writeValue(new File(outputFile), dataset);
62-
logger.info("SRM/CWE list exported to {}", outputFile);
63-
6458
long analysisTime = System.currentTimeMillis() - startAnalysisTime;
6559
logger.info("Total runtime {} minutes", analysisTime / 60000);
6660
}

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/cli/CliRunner.java

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ public class CliRunner implements Callable<Integer> {
2525
@CommandLine.Option(names = {"-f", "--feature"}, description = {"Select one or more feature sets: all, code, doc-auto or doc-manual"})
2626
private List<String> featureSet = Collections.singletonList("code");
2727

28-
@CommandLine.Option(names = {"-l", "--learning"}, description = {"Learning modes: manual, auto"})
29-
private String learningMode = "manual";
28+
@CommandLine.Option(names = {"-t", "--toolkit"}, description = {"ML toolkit: meka, weka, ml-plan"})
29+
private String toolkit = "meka";
3030

3131
@CommandLine.Option(names = {"-s", "--srm"}, description = {"SRM: all, source, sink, sanitizer, authentication, relevant"})
3232
private List<String> srmClasses = Collections.singletonList("all");
@@ -46,6 +46,13 @@ public class CliRunner implements Callable<Integer> {
4646
@CommandLine.Option(names = {"-sp", "--training-split"}, description = {"Percentage for training"})
4747
private double split = 0.7;
4848

49+
@CommandLine.Option(names = {"-p", "--phase"}, description = {"Phase: validate, predict"})
50+
private String phase = "predict";
51+
52+
@CommandLine.Option(names = {"-pt", "--prediction-threshold"}, description = {"Threshold for predicting categories"})
53+
private double predictionThreshold = 0.5;
54+
55+
4956
@Override
5057
public Integer call() throws Exception {
5158

@@ -54,13 +61,15 @@ public Integer call() throws Exception {
5461
datasetJson,
5562
outputDir,
5663
featureSet,
57-
learningMode,
64+
toolkit,
5865
srmClasses,
5966
cweClasses,
6067
exportArffData,
6168
isDocumented,
6269
iterations,
63-
split);
70+
split,
71+
phase);
72+
options.setPredictionThreshold(predictionThreshold);
6473

6574
return new SwanCli().run(options);
6675
}

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/cli/SwanCli.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ public Integer run(SwanOptions options) throws Exception {
3131
}
3232

3333
if(options.getSrmClasses().contains("all")){
34-
options.setSrmClasses(Arrays.asList("source", "sink", "sanitizer", "authentication", "relevant"));
34+
options.setSrmClasses(Arrays.asList("source", "sink", "sanitizer", "authentication"));
3535
}
3636

3737
if(options.getCweClasses().contains("all")){

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/cli/SwanOptions.java

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,23 +15,44 @@ public class SwanOptions {
1515
private String datasetJson;
1616
private String outputDir;
1717
private List<String> featureSet;
18-
private String learningMode;
18+
private String toolkit;
1919
private List<String> srmClasses;
2020
private List<String> cweClasses;
2121
private boolean exportArffData;
2222
private boolean isDocumented;
2323
private int iterations;
2424
private double trainTestSplit;
25+
private String phase;
26+
private double predictionThreshold;
2527

2628
public SwanOptions(String testDataDir, String trainDataDir, String datasetJson, String outputDir,
27-
List<String> featureSet, String learningMode, List<String> srmClasses, List<String> cweClasses,
29+
List<String> featureSet, String toolkit, List<String> srmClasses,
30+
List<String> cweClasses, boolean exportArffData, boolean isDocumented,
31+
int iterations, double trainTestSplit, String phase) {
32+
this.testDataDir = testDataDir;
33+
this.trainDataDir = trainDataDir;
34+
this.datasetJson = datasetJson;
35+
this.outputDir = outputDir;
36+
this.featureSet = featureSet;
37+
this.toolkit = toolkit;
38+
this.srmClasses = srmClasses;
39+
this.cweClasses = cweClasses;
40+
this.exportArffData = exportArffData;
41+
this.isDocumented = isDocumented;
42+
this.iterations = iterations;
43+
this.trainTestSplit = trainTestSplit;
44+
this.phase = phase;
45+
}
46+
47+
public SwanOptions(String testDataDir, String trainDataDir, String datasetJson, String outputDir,
48+
List<String> featureSet, String toolkit, List<String> srmClasses, List<String> cweClasses,
2849
boolean exportArffData, boolean isDocumented, int iterations, double trainTestSplit) {
2950
this.testDataDir = testDataDir;
3051
this.trainDataDir = trainDataDir;
3152
this.datasetJson = datasetJson;
3253
this.outputDir = outputDir;
3354
this.featureSet = featureSet;
34-
this.learningMode = learningMode;
55+
this.toolkit = toolkit;
3556
this.srmClasses = srmClasses;
3657
this.cweClasses = cweClasses;
3758
this.exportArffData = exportArffData;
@@ -80,12 +101,12 @@ public void setFeatureSet(List<String> featureSet) {
80101
this.featureSet = featureSet;
81102
}
82103

83-
public String getLearningMode() {
84-
return learningMode;
104+
public String getToolkit() {
105+
return toolkit;
85106
}
86107

87-
public void setLearningMode(String learningMode) {
88-
this.learningMode = learningMode;
108+
public void setToolkit(String toolkit) {
109+
this.toolkit = toolkit;
89110
}
90111

91112
public List<String> getSrmClasses() {
@@ -140,6 +161,22 @@ public void setTrainTestSplit(double trainTestSplit) {
140161
this.trainTestSplit = trainTestSplit;
141162
}
142163

164+
public String getPhase() {
165+
return phase;
166+
}
167+
168+
public void setPhase(String phase) {
169+
this.phase = phase;
170+
}
171+
172+
public double getPredictionThreshold() {
173+
return predictionThreshold;
174+
}
175+
176+
public void setPredictionThreshold(double predictionThreshold) {
177+
this.predictionThreshold = predictionThreshold;
178+
}
179+
143180
@Override
144181
public String toString() {
145182
return "SwanOptions{" +
@@ -148,7 +185,7 @@ public String toString() {
148185
", datasetJson='" + datasetJson + '\'' +
149186
", outputDir='" + outputDir + '\'' +
150187
", featureSet='" + featureSet + '\'' +
151-
", learningMode='" + learningMode + '\'' +
188+
", learningMode='" + toolkit + '\'' +
152189
", srmClasses=" + srmClasses +
153190
", cweClasses=" + cweClasses +
154191
", exportArffData=" + exportArffData +

swan-pipeline/src/main/java/de/fraunhofer/iem/swan/data/Javadoc.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11

22
package de.fraunhofer.iem.swan.data;
33

4+
import com.fasterxml.jackson.annotation.JsonIgnore;
45
import com.fasterxml.jackson.annotation.JsonProperty;
56

67
/**
@@ -42,9 +43,11 @@ public void setClassComment(String classComment) {
4243
this.classComment = classComment;
4344
}
4445

46+
@JsonIgnore
4547
public String getMergedComments() {
4648
return methodComment + " " + classComment;
4749
}
50+
4851
@Override
4952
public String toString() {
5053
return "Javadoc{" +

0 commit comments

Comments
 (0)