Skip to content

Commit a630388

Browse files
authored
Merge pull request #824 from samyron/sm/java-vector-simd
Use Vector API in the Java Extension
2 parents f228b30 + d40b270 commit a630388

File tree

6 files changed

+163
-9
lines changed

6 files changed

+163
-9
lines changed

Rakefile

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ end
8686
JAVA_DIR = "java/src/json/ext"
8787
JAVA_RAGEL_PATH = "#{JAVA_DIR}/ParserConfig.rl"
8888
JAVA_PARSER_SRC = "#{JAVA_DIR}/ParserConfig.java"
89-
JAVA_SOURCES = FileList["#{JAVA_DIR}/*.java"]
89+
JAVA_SOURCES = FileList["#{JAVA_DIR}/*.java"].exclude("#{JAVA_DIR}/Vectorized*.java")
90+
JAVA_VEC_SOURCES = FileList["#{JAVA_DIR}/Vectorized*.java"]
9091
JAVA_CLASSES = []
9192
JRUBY_PARSER_JAR = File.expand_path("lib/json/ext/parser.jar")
9293
JRUBY_GENERATOR_JAR = File.expand_path("lib/json/ext/generator.jar")
@@ -142,8 +143,8 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby'
142143

143144
JRUBY_JAR = File.join(CONFIG["libdir"], "jruby.jar")
144145
if File.exist?(JRUBY_JAR)
146+
classpath = (Dir['java/lib/*.jar'] << 'java/src' << JRUBY_JAR) * path_separator
145147
JAVA_SOURCES.each do |src|
146-
classpath = (Dir['java/lib/*.jar'] << 'java/src' << JRUBY_JAR) * path_separator
147148
obj = src.sub(/\.java\Z/, '.class')
148149
file obj => src do
149150
if File.exist?(File.join(ENV['JAVA_HOME'], "lib", "modules"))
@@ -154,6 +155,20 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby'
154155
end
155156
JAVA_CLASSES << obj
156157
end
158+
159+
JAVA_VEC_SOURCES.each do |src|
160+
obj = src.sub(/\.java\Z/, '.class')
161+
file obj => src do
162+
sh 'javac', '--add-modules', 'jdk.incubator.vector', '-classpath', classpath, '--release', '16', src do |success, status|
163+
if success
164+
puts "*** 'jdk.incubator.vector' support enabled ***"
165+
else
166+
puts "*** 'jdk.incubator.vector' support disabled ***"
167+
end
168+
end
169+
end
170+
JAVA_CLASSES << obj
171+
end
157172
else
158173
warn "WARNING: Cannot find jruby in path => Cannot build jruby extension!"
159174
end
@@ -199,11 +214,13 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby'
199214
generator_classes = FileList[
200215
"json/ext/*ByteList*.class",
201216
"json/ext/OptionsReader*.class",
217+
"json/ext/EscapeScanner*.class",
202218
"json/ext/Generator*.class",
203219
"json/ext/RuntimeInfo*.class",
204220
"json/ext/*StringEncoder*.class",
205221
"json/ext/Utils*.class"
206222
]
223+
puts "Creating generator jar with classes: #{generator_classes.join(', ')}"
207224
sh 'jar', 'cf', File.basename(JRUBY_GENERATOR_JAR), *generator_classes
208225
mv File.basename(JRUBY_GENERATOR_JAR), File.dirname(JRUBY_GENERATOR_JAR)
209226
end

java/src/json/ext/AbstractByteListDirectOutputStream.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@ abstract class AbstractByteListDirectOutputStream extends OutputStream {
1515
static {
1616
String useSegmentedOutputStream = System.getProperty(PROP_SEGMENTED_BUFFER, PROP_SEGMENTED_BUFFER_DEFAULT);
1717
USE_SEGMENTED_BUFFER = Boolean.parseBoolean(useSegmentedOutputStream);
18-
// XXX Is there a logger we can use here?
19-
// System.out.println("Using segmented output stream: " + USE_SEGMENTED_BUFFER);
2018
}
2119

2220
public static AbstractByteListDirectOutputStream create(int estimatedSize) {

java/src/json/ext/SWARBasicStringEncoder.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ void encode(ByteList src) throws IOException {
7171
}
7272
}
7373

74-
private boolean skipChunk(long x) {
74+
boolean skipChunk(long x) {
7575
long is_ascii = 0x8080808080808080L & ~x;
7676
long xor2 = x ^ 0x0202020202020202L;
7777
long lt32_or_eq34 = xor2 - 0x2121212121212121L;
@@ -80,7 +80,7 @@ private boolean skipChunk(long x) {
8080
return ((lt32_or_eq34 | eq92) & is_ascii) == 0;
8181
}
8282

83-
private boolean skipChunk(int x) {
83+
boolean skipChunk(int x) {
8484
int is_ascii = 0x80808080 & ~x;
8585
int xor2 = x ^ 0x02020202;
8686
int lt32_or_eq34 = xor2 - 0x21212121;

java/src/json/ext/StringEncoder.java

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
import java.io.IOException;
99
import java.io.OutputStream;
10+
import java.lang.reflect.Constructor;
11+
import java.lang.reflect.InvocationTargetException;
1012
import java.nio.charset.StandardCharsets;
1113

1214
import org.jcodings.Encoding;
@@ -114,15 +116,37 @@ class StringEncoder extends ByteListTranscoder {
114116

115117
protected final byte[] escapeTable;
116118

119+
private static final String VECTORIZED_STRING_ENCODER_CLASS = "json.ext.VectorizedStringEncoder";
120+
private static final String USE_VECTORIZED_BASIC_ENCODER_PROP = "jruby.json.useVectorizedBasicEncoder";
121+
private static final String USE_VECTORIZED_BASIC_ENCODER_DEFAULT = "false";
122+
private static final boolean USE_VECTORIZED_BASIC_ENCODER;
123+
private static final StringEncoder VECTORIZED_SCANNER;
124+
117125
private static final String USE_SWAR_BASIC_ENCODER_PROP = "jruby.json.useSWARBasicEncoder";
118126
private static final String USE_SWAR_BASIC_ENCODER_DEFAULT = "true";
119127
private static final boolean USE_BASIC_SWAR_ENCODER;
120128

121129
static {
130+
String enableVectorizedScanner = System.getProperty(USE_VECTORIZED_BASIC_ENCODER_PROP, USE_VECTORIZED_BASIC_ENCODER_DEFAULT);
131+
if ("true".equalsIgnoreCase(enableVectorizedScanner) || "1".equalsIgnoreCase(enableVectorizedScanner)) {
132+
StringEncoder scanner;
133+
try {
134+
Class<?> vectorizedStringEncoderClass = StringEncoder.class.getClassLoader().loadClass(VECTORIZED_STRING_ENCODER_CLASS);
135+
Constructor<?> vectorizedStringEncoderConstructor = vectorizedStringEncoderClass.getDeclaredConstructor();
136+
scanner = (StringEncoder) vectorizedStringEncoderConstructor.newInstance();
137+
} catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) {
138+
// Fallback to the StringEncoder if we cannot load the VectorizedStringEncoder.
139+
scanner = null;
140+
}
141+
VECTORIZED_SCANNER = scanner;
142+
USE_VECTORIZED_BASIC_ENCODER = scanner != null;
143+
} else {
144+
VECTORIZED_SCANNER = null;
145+
USE_VECTORIZED_BASIC_ENCODER = false;
146+
}
147+
122148
USE_BASIC_SWAR_ENCODER = Boolean.parseBoolean(
123149
System.getProperty(USE_SWAR_BASIC_ENCODER_PROP, USE_SWAR_BASIC_ENCODER_DEFAULT));
124-
// XXX Is there a logger we can use here?
125-
// System.out.println("Using SWAR basic encoder: " + USE_BASIC_SWAR_ENCODER);
126150
}
127151

128152
OutputStream out;
@@ -149,8 +173,15 @@ class StringEncoder extends ByteListTranscoder {
149173
this.escapeTable = escapeTable;
150174
}
151175

176+
@Override
177+
public StringEncoder clone() {
178+
return new StringEncoder(escapeTable);
179+
}
180+
152181
static StringEncoder createBasicEncoder() {
153-
if (USE_BASIC_SWAR_ENCODER) {
182+
if (USE_VECTORIZED_BASIC_ENCODER) {
183+
return (StringEncoder) VECTORIZED_SCANNER.clone();
184+
} else if (USE_BASIC_SWAR_ENCODER) {
154185
return new SWARBasicStringEncoder();
155186
} else {
156187
return new StringEncoder(false);
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
package json.ext;
2+
3+
import java.io.IOException;
4+
import java.nio.ByteBuffer;
5+
6+
import org.jruby.util.ByteList;
7+
8+
import jdk.incubator.vector.ByteVector;
9+
import jdk.incubator.vector.VectorMask;
10+
import jdk.incubator.vector.VectorOperators;
11+
import jdk.incubator.vector.VectorSpecies;
12+
13+
class VectorizedStringEncoder extends SWARBasicStringEncoder {
14+
private static final VectorSpecies<Byte> SP = ByteVector.SPECIES_PREFERRED;
15+
private static final ByteVector ZERO = ByteVector.zero(SP);
16+
private static final ByteVector TWO = ByteVector.broadcast(SP, 2);
17+
private static final ByteVector THIRTY_THREE = ByteVector.broadcast(SP, 33);
18+
private static final ByteVector BACKSLASH = ByteVector.broadcast(SP, '\\');
19+
20+
@Override
21+
public StringEncoder clone() {
22+
return new VectorizedStringEncoder();
23+
}
24+
25+
@Override
26+
void encode(ByteList src) throws IOException {
27+
byte[] ptrBytes = src.unsafeBytes();
28+
int ptr = src.begin();
29+
int len = src.realSize();
30+
int beg = 0;
31+
int pos = ptr;
32+
33+
while ((pos + SP.length() <= len)) {
34+
ByteVector chunk = ByteVector.fromArray(SP, ptrBytes, ptr + pos);
35+
// bytes are signed in java, so we need to remove negative values
36+
VectorMask<Byte> negative = chunk.lt(ZERO);
37+
VectorMask<Byte> tooLowOrDblQuote = chunk.lanewise(VectorOperators.XOR, TWO).lt(THIRTY_THREE).andNot(negative);
38+
VectorMask<Byte> needsEscape = chunk.eq(BACKSLASH).or(tooLowOrDblQuote);
39+
if (needsEscape.anyTrue()) {
40+
int chunkStart = pos;
41+
long mask = needsEscape.toLong();
42+
43+
while(mask > 0) {
44+
// nextMatch inlined
45+
int index = Long.numberOfTrailingZeros(mask);
46+
mask &= (mask - 1);
47+
pos = chunkStart + index;
48+
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
49+
50+
beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
51+
escapeAscii(ch, aux, HEX);
52+
}
53+
54+
// Skip over any remaining characters in the current chunk
55+
pos = chunkStart + SP.length();
56+
continue;
57+
}
58+
59+
pos += SP.length();
60+
}
61+
62+
ByteBuffer bb = ByteBuffer.wrap(ptrBytes, ptr, len);
63+
if (pos + 8 <= len) {
64+
long x = bb.getLong(ptr + pos);
65+
if (skipChunk(x)) {
66+
pos += 8;
67+
} else {
68+
int chunkEnd = ptr + pos + 8;
69+
while (pos < chunkEnd) {
70+
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
71+
int ch_len = ESCAPE_TABLE[ch];
72+
if (ch_len > 0) {
73+
beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
74+
escapeAscii(ch, aux, HEX);
75+
} else {
76+
pos++;
77+
}
78+
}
79+
}
80+
}
81+
82+
if (pos + 4 <= len) {
83+
int x = bb.getInt(ptr + pos);
84+
if (skipChunk(x)) {
85+
pos += 4;
86+
}
87+
}
88+
89+
while (pos < len) {
90+
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
91+
int ch_len = ESCAPE_TABLE[ch];
92+
if (ch_len > 0) {
93+
beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
94+
escapeAscii(ch, aux, HEX);
95+
} else {
96+
pos++;
97+
}
98+
}
99+
100+
if (beg < len) {
101+
append(ptrBytes, ptr + beg, len - beg);
102+
}
103+
}
104+
}

test/json/json_encoding_test.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ def test_generate_shared_string
3737
assert_equal '"234567890"', JSON.dump(s[2..-1])
3838
s = '01234567890123456789"a"b"c"d"e"f"g"h'
3939
assert_equal '"\"a\"b\"c\"d\"e\"f\"g\""', JSON.dump(s[20, 15])
40+
s = "0123456789001234567890012345678900123456789001234567890"
41+
assert_equal '"23456789001234567890012345678900123456789001234567890"', JSON.dump(s[2..-1])
42+
s = "0123456789001234567890012345678900123456789001234567890"
43+
assert_equal '"567890012345678900123456789001234567890012345678"', JSON.dump(s[5..-3])
4044
end
4145

4246
def test_unicode

0 commit comments

Comments
 (0)