Skip to content

Commit ed282d3

Browse files
committedNov 6, 2024·
Adding optional json output of results.
1 parent 164b921 commit ed282d3

File tree

8 files changed

+214
-27
lines changed

8 files changed

+214
-27
lines changed
 

‎.github/workflows/build.yaml

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name: Build
2+
on: [push, pull_request, workflow_dispatch]
3+
jobs:
4+
build:
5+
runs-on: ubuntu-latest
6+
permissions:
7+
contents: read
8+
packages: write
9+
steps:
10+
- uses: actions/checkout@v3
11+
with:
12+
lfs: true
13+
- name: Cache Maven packages
14+
uses: actions/cache@v1
15+
with:
16+
path: ~/.m2
17+
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
18+
restore-keys: ${{ runner.os }}-m2
19+
- name: Set up JDK 21
20+
uses: actions/setup-java@v3
21+
with:
22+
java-version: 21
23+
distribution: adopt
24+
architecture: x64
25+
- name: Build
26+
run: mvn --batch-mode test

‎README.md

+8-2
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ This command-line utility runs a series of single-threaded workloads using [Phil
55
to redact PII tokens in strings of varying sizes. Workloads can be run multiple times to warm up the JVM or test long-term use.
66
Workloads run for a fixed amount of time rather than a fixed number of iterations.
77

8-
[![CodeFactor](https://www.codefactor.io/repository/github/resurfaceio/phileas-benchmark/badge)](https://www.codefactor.io/repository/github/resurfaceio/phileas-benchmark)
8+
[![CodeFactor](https://www.codefactor.io/repository/github/philterd/phileas-benchmark/badge)](https://www.codefactor.io/repository/github/resurfaceio/phileas-benchmark)
99

1010
## Dependencies
1111

12-
* Java 22
12+
* Java 21
1313
* Maven 3.9.x
1414
* [philterd/phileas](https://github.com/philterd/phileas)
1515

@@ -25,6 +25,12 @@ java -server -Xmx512M -XX:+AlwaysPreTouch -XX:PerBytecodeRecompilationCutoff=100
2525
java -server -Xmx512M -XX:+AlwaysPreTouch -XX:PerBytecodeRecompilationCutoff=10000 -XX:PerMethodRecompilationCutoff=10000 -jar target/phileas-benchmark-cmd.jar gettysberg_address mask_credit_cards 1 1000
2626
```
2727

28+
To get the results back as a JSON object, append a `json` argument to the command:
29+
30+
```
31+
java -server -Xmx512M -XX:+AlwaysPreTouch -XX:PerBytecodeRecompilationCutoff=10000 -XX:PerMethodRecompilationCutoff=10000 -jar target/phileas-benchmark-cmd.jar all mask_all 1 15000 json
32+
```
33+
2834
### Available documents
2935

3036
* hello_world (11 chars)

‎pom.xml

+7-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
<artifactId>maven-compiler-plugin</artifactId>
1919
<version>3.13.0</version>
2020
<configuration>
21-
<release>22</release>
21+
<release>21</release>
2222
<compilerArgument>-proc:none</compilerArgument>
2323
</configuration>
2424
</plugin>
@@ -55,6 +55,12 @@
5555
<version>2.7.0-SNAPSHOT</version>
5656
</dependency>
5757

58+
<dependency>
59+
<groupId>com.google.code.gson</groupId>
60+
<artifactId>gson</artifactId>
61+
<version>2.11.0</version>
62+
</dependency>
63+
5864
<!-- for testing -->
5965
<dependency>
6066
<groupId>com.mscharhag.oleaster</groupId>

‎src/main/java/ai/philterd/phileas/benchmark/Documents.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ public class Documents {
2828
// todo add JSON-encoded documents
2929
// todo add documents with PCI matches
3030

31-
public static final String GETTYSBERG_ADDRESS = """
31+
public static final String GETTYSBURG_ADDRESS = """
3232
Four score and seven years ago our fathers brought forth on this continent, a new nation, conceived in Liberty, and dedicated to the proposition that all men are created equal.
3333
Now we are engaged in a great civil war, testing whether that nation, or any nation so conceived and so dedicated, can long endure. We are met on a great battle-field of that war. We have come to dedicate a portion of that field, as a final resting place for those who here gave their lives that that nation might live. It is altogether fitting and proper that we should do this.
3434
But, in a larger sense, we can not dedicate -- we can not consecrate -- we can not hallow -- this ground. The brave men, living and dead, who struggled here, have consecrated it, far above our poor power to add or detract. The world will little note, nor long remember what we say here, but it can never forget what they did here. It is for us the living, rather, to be dedicated here to the unfinished work which they who fought here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining before us -- that from these honored dead we take increased devotion to that cause for which they gave the last full measure of devotion -- that we here highly resolve that these dead shall not have died in vain -- that this nation, under God, shall have a new birth of freedom -- and that government of the people, by the people, for the people, shall not perish from the earth.
@@ -82,13 +82,13 @@ public class Documents {
8282

8383
public static final List<String> keys = List.of(
8484
"hello_world",
85-
"gettysberg_address",
85+
"gettysburg_address",
8686
"i_have_a_dream"
8787
);
8888

8989
public static final Map<String, String> map = Map.ofEntries(
9090
new AbstractMap.SimpleEntry<>("hello_world", "Hello world"),
91-
new AbstractMap.SimpleEntry<>("gettysberg_address", GETTYSBERG_ADDRESS),
91+
new AbstractMap.SimpleEntry<>("gettysburg_address", GETTYSBURG_ADDRESS),
9292
new AbstractMap.SimpleEntry<>("i_have_a_dream", I_HAVE_A_DREAM)
9393
);
9494

‎src/main/java/ai/philterd/phileas/benchmark/Main.java

+70-18
Original file line numberDiff line numberDiff line change
@@ -16,54 +16,106 @@
1616

1717
package ai.philterd.phileas.benchmark;
1818

19+
import com.google.gson.Gson;
20+
21+
import java.util.HashMap;
22+
import java.util.LinkedList;
1923
import java.util.List;
24+
import java.util.Map;
2025

2126
/**
2227
* Run benchmark workloads for Phileas PII engine.
2328
*/
2429
public class Main {
2530

2631
public static void main(String[] args) throws Exception {
32+
2733
// show usage statement if needed
28-
if (args.length != 4) {
29-
System.out.println("Usage: java ai.philterd.phileas.benchmark.Main <document> <redactor> <repetitions> <workload_millis>");
34+
if (args.length != 4 && args.length != 5) {
35+
System.out.println("Usage: java ai.philterd.phileas.benchmark.Main <document> <redactor> <repetitions> <workload_millis> <output_format>");
3036
throw new IllegalArgumentException("Invalid arguments");
3137
}
3238

3339
// read arguments
34-
String arg_document = args[0];
35-
String arg_redactor = args[1];
36-
int repetitions = Integer.parseInt(args[2]);
37-
int workload_millis = Integer.parseInt(args[3]);
40+
final String arg_document = args[0];
41+
final String arg_redactor = args[1];
42+
final int repetitions = Integer.parseInt(args[2]);
43+
final int workload_millis = Integer.parseInt(args[3]);
44+
45+
String arg_format = "sysout";
46+
if(args.length == 5) {
47+
arg_format = args[4];
48+
}
3849

3950
// create redactor based on Phileas PII engine
40-
Redactor redactor = new Redactor(arg_redactor);
51+
final Redactor redactor = new Redactor(arg_redactor);
52+
53+
final List<Result> results = new LinkedList<>();
4154

4255
// repeatedly redact documents and print results
43-
List<String> documents = "all".equals(arg_document) ? Documents.keys : List.of(arg_document);
44-
int[] value_lengths = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 768, 1024, 1280, 1536, 1792, 2048, 3072, 4096};
56+
final List<String> documents = "all".equals(arg_document) ? Documents.keys : List.of(arg_document);
57+
final int[] value_lengths = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 768, 1024, 1280, 1536, 1792, 2048, 3072, 4096};
58+
4559
for (int i = 0; i < repetitions; i++) {
46-
for (String document : documents) {
47-
try {
60+
61+
for (final String document : documents) {
62+
63+
if (!arg_format.equals("json")) {
4864
System.out.println("\n------------------------------------------------------------------------------------------");
4965
System.out.println("Using document: " + document);
5066
System.out.println("Using redactor: " + arg_redactor);
5167
System.out.println("Using workload_millis: " + workload_millis);
5268
System.out.println("\nstring_length,calls_per_sec");
53-
for (int value_length : value_lengths) run_workload(workload_millis, redactor, Documents.get(document).substring(0, value_length));
54-
} catch (StringIndexOutOfBoundsException e) {
55-
// do nothing, ignore
5669
}
70+
71+
final Map<Integer, Long> calls = new HashMap<>();
72+
73+
for (int value_length : value_lengths) {
74+
75+
if(Documents.get(document).length() >= value_length) {
76+
77+
final String value = Documents.get(document).substring(0, value_length);
78+
final long calls_per_sec = run_workload(workload_millis, redactor, value);
79+
80+
if (!arg_format.equals("json")) {
81+
System.out.println(value.length() + "," + calls_per_sec);
82+
}
83+
84+
calls.put(value_length, calls_per_sec);
85+
86+
} else {
87+
break;
88+
}
89+
90+
}
91+
92+
final Result result = new Result();
93+
result.setWorkloadMillis(workload_millis);
94+
result.setRedactor(arg_redactor);
95+
result.setDocument(document);
96+
result.setCallsPerSecond(calls);
97+
98+
results.add(result);
99+
57100
}
101+
58102
}
103+
104+
if(arg_format.equals("json")) {
105+
final Gson gson = new Gson();
106+
System.out.println(gson.toJson(results));
107+
}
108+
59109
}
60110

61-
private static void run_workload(int millis, Redactor redactor, String value) throws Exception {
62-
long start = System.currentTimeMillis();
111+
private static long run_workload(int millis, Redactor redactor, String value) throws Exception {
112+
113+
final long start = System.currentTimeMillis();
63114
long calls = -1;
64115
while ((++calls % 100 != 0) || (System.currentTimeMillis() - start < millis)) redactor.filter(value);
65-
long calls_per_sec = calls * 1000 / (System.currentTimeMillis() - start);
66-
System.out.println(value.length() + "," + calls_per_sec);
116+
117+
return calls * 1000 / (System.currentTimeMillis() - start);
118+
67119
}
68120

69121
}

‎src/main/java/ai/philterd/phileas/benchmark/Redactor.java

+24-3
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,32 @@
1919
import ai.philterd.phileas.model.configuration.PhileasConfiguration;
2020
import ai.philterd.phileas.model.enums.MimeType;
2121
import ai.philterd.phileas.model.policy.Identifiers;
22-
import ai.philterd.phileas.model.policy.IgnoredPattern;
2322
import ai.philterd.phileas.model.policy.Policy;
24-
import ai.philterd.phileas.model.policy.filters.*;
23+
import ai.philterd.phileas.model.policy.filters.BankRoutingNumber;
24+
import ai.philterd.phileas.model.policy.filters.BitcoinAddress;
25+
import ai.philterd.phileas.model.policy.filters.CreditCard;
26+
import ai.philterd.phileas.model.policy.filters.DriversLicense;
27+
import ai.philterd.phileas.model.policy.filters.EmailAddress;
28+
import ai.philterd.phileas.model.policy.filters.IbanCode;
29+
import ai.philterd.phileas.model.policy.filters.IpAddress;
30+
import ai.philterd.phileas.model.policy.filters.PassportNumber;
31+
import ai.philterd.phileas.model.policy.filters.PhoneNumber;
32+
import ai.philterd.phileas.model.policy.filters.Ssn;
33+
import ai.philterd.phileas.model.policy.filters.TrackingNumber;
34+
import ai.philterd.phileas.model.policy.filters.Vin;
2535
import ai.philterd.phileas.model.policy.filters.strategies.AbstractFilterStrategy;
26-
import ai.philterd.phileas.model.policy.filters.strategies.rules.*;
36+
import ai.philterd.phileas.model.policy.filters.strategies.rules.BankRoutingNumberFilterStrategy;
37+
import ai.philterd.phileas.model.policy.filters.strategies.rules.BitcoinAddressFilterStrategy;
38+
import ai.philterd.phileas.model.policy.filters.strategies.rules.CreditCardFilterStrategy;
39+
import ai.philterd.phileas.model.policy.filters.strategies.rules.DriversLicenseFilterStrategy;
40+
import ai.philterd.phileas.model.policy.filters.strategies.rules.EmailAddressFilterStrategy;
41+
import ai.philterd.phileas.model.policy.filters.strategies.rules.IbanCodeFilterStrategy;
42+
import ai.philterd.phileas.model.policy.filters.strategies.rules.IpAddressFilterStrategy;
43+
import ai.philterd.phileas.model.policy.filters.strategies.rules.PassportNumberFilterStrategy;
44+
import ai.philterd.phileas.model.policy.filters.strategies.rules.PhoneNumberFilterStrategy;
45+
import ai.philterd.phileas.model.policy.filters.strategies.rules.SsnFilterStrategy;
46+
import ai.philterd.phileas.model.policy.filters.strategies.rules.TrackingNumberFilterStrategy;
47+
import ai.philterd.phileas.model.policy.filters.strategies.rules.VinFilterStrategy;
2748
import ai.philterd.phileas.model.responses.FilterResponse;
2849
import ai.philterd.phileas.services.PhileasFilterService;
2950

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* Copyright 2024 Philterd, LLC @ https://www.philterd.ai
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package ai.philterd.phileas.benchmark;
18+
19+
import com.google.gson.annotations.SerializedName;
20+
21+
import java.util.HashMap;
22+
import java.util.Map;
23+
24+
public class Result {
25+
26+
private String document;
27+
private String redactor;
28+
29+
@SerializedName("workload_millis")
30+
private long workloadMillis;
31+
32+
@SerializedName("calls_per_sec")
33+
private Map<Integer, Long> callsPerSecond;
34+
35+
public Result() {
36+
this.callsPerSecond = new HashMap<>();
37+
}
38+
39+
public String getDocument() {
40+
return document;
41+
}
42+
43+
public void setDocument(String document) {
44+
this.document = document;
45+
}
46+
47+
public String getRedactor() {
48+
return redactor;
49+
}
50+
51+
public void setRedactor(String redactor) {
52+
this.redactor = redactor;
53+
}
54+
55+
public long getWorkloadMillis() {
56+
return workloadMillis;
57+
}
58+
59+
public void setWorkloadMillis(long workloadMillis) {
60+
this.workloadMillis = workloadMillis;
61+
}
62+
63+
public Map<Integer, Long> getCallsPerSecond() {
64+
return callsPerSecond;
65+
}
66+
67+
public void setCallsPerSecond(Map<Integer, Long> callsPerSecond) {
68+
this.callsPerSecond = callsPerSecond;
69+
}
70+
71+
}

‎src/main/resources/log4j2.properties

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
rootLogger=OFF, STDOUT
2+
appender.console.type = Console
3+
appender.console.name = STDOUT
4+
appender.console.layout.type = PatternLayout
5+
appender.console.layout.pattern = [%-5level] %d{yyyy-MM-dd HH:mm:ss.SSS} [%t] %c{1} - %msg%n

0 commit comments

Comments
 (0)
Please sign in to comment.