From 5d5163181f601227a1c9a352b93c7a044c251808 Mon Sep 17 00:00:00 2001 From: Nabil Fawwaz Elqayyim Date: Thu, 21 Aug 2025 21:13:08 +0700 Subject: [PATCH 1/2] Optimize WebFlux multipart upload performance Improve AbstractNestedMatcher by using a thread-local buffer and chunked scanning to reduce allocations and speed up multipart boundary detection. Closes gh-34651 Signed-off-by: Nabil Fawwaz Elqayyim --- .../core/io/buffer/DataBufferUtils.java | 105 ++++++++++++++++-- 1 file changed, 98 insertions(+), 7 deletions(-) diff --git a/spring-core/src/main/java/org/springframework/core/io/buffer/DataBufferUtils.java b/spring-core/src/main/java/org/springframework/core/io/buffer/DataBufferUtils.java index 4c3c430e9545..493c268ed511 100644 --- a/spring-core/src/main/java/org/springframework/core/io/buffer/DataBufferUtils.java +++ b/spring-core/src/main/java/org/springframework/core/io/buffer/DataBufferUtils.java @@ -859,7 +859,26 @@ public void reset() { /** - * Base class for a {@link NestedMatcher}. + * An abstract base implementation of {@link NestedMatcher} that looks for + * a specific delimiter in a {@link DataBuffer}. + *

+ * Uses a thread-local buffer to scan data in chunks, reducing memory + * allocations and improving performance when processing large buffers. + *

+ * + *

+ * Each matcher keeps its own match state, so it is intended for + * single-threaded use. The thread-local buffer ensures that multiple + * threads can run their own matchers independently without interfering. + *

+ * + *

+ * Subclasses can extend this class to add custom matching behavior while + * reusing the built-in delimiter tracking and scanning logic. + *

+ * + * @see NestedMatcher + * @see DataBuffer */ private abstract static class AbstractNestedMatcher implements NestedMatcher { @@ -867,6 +886,8 @@ private abstract static class AbstractNestedMatcher implements NestedMatcher { private int matches = 0; + // Thread-local chunk buffer to avoid per-call allocations + private static final ThreadLocal LOCAL_BUFFER = ThreadLocal.withInitial(() -> new byte[8 * 1024]); protected AbstractNestedMatcher(byte[] delimiter) { this.delimiter = delimiter; @@ -880,16 +901,86 @@ protected int getMatches() { return this.matches; } + protected static void releaseLocalBuffer() { + LOCAL_BUFFER.remove(); + } + @Override public int match(DataBuffer dataBuffer) { - for (int pos = dataBuffer.readPosition(); pos < dataBuffer.writePosition(); pos++) { - byte b = dataBuffer.getByte(pos); - if (match(b)) { + final int readPos = dataBuffer.readPosition(); + final int writePos = dataBuffer.writePosition(); + final int length = writePos - readPos; + + final byte[] delimiter0 = this.delimiter; + final int delimiterLen = delimiter0.length; + final byte delimiter1 = delimiter0[0]; + + int matchIndex = this.matches; + + final byte[] chunk = LOCAL_BUFFER.get(); + final int chunkSize = Math.min(chunk.length, length); + + try { + for (int offset = 0; offset < length; offset += chunkSize) { + int currentChunkSize = Math.min(chunkSize, length - offset); + + dataBuffer.readPosition(readPos + offset); + dataBuffer.read(chunk, 0, currentChunkSize); + + matchIndex = processChunk(chunk, currentChunkSize, delimiter0, delimiterLen, delimiter1, matchIndex, readPos, offset); + if (matchIndex < 0) { + return -(matchIndex + 1); // found, returning actual position + } + } + + this.matches = matchIndex; + return -1; + } + finally { + dataBuffer.readPosition(readPos); // restore original position + releaseLocalBuffer(); + } + } + + private int processChunk(byte[] chunk, int currentChunkSize, byte[] delimiter0, int delimiterLen, byte delimiter1, int matchIndex, int readPos, int offset) { + int i = 0; + while (i < currentChunkSize) { + if (matchIndex == 0) { + i = findNextCandidate(chunk, i, currentChunkSize, delimiter1); + if (i >= currentChunkSize) { + return matchIndex; // no candidate in this chunk + } + } + + matchIndex = updateMatchIndex(chunk[i], delimiter0, delimiterLen, delimiter1, matchIndex); + if (matchIndex == -1) { + return -(readPos + offset + i + 1); // return found delimiter position (encoded as negative) + } + i++; + } + return matchIndex; + } + + private int findNextCandidate(byte[] chunk, int start, int limit, byte delimiter1) { + int j = start; + while (j < limit && chunk[j] != delimiter1) { + j++; + } + return j; + } + + private int updateMatchIndex(byte b, byte[] delimiter0, int delimiterLen, byte delimiter1, int matchIndex) { + if (b == delimiter0[matchIndex]) { + matchIndex++; + if (matchIndex == delimiterLen) { reset(); - return pos; + return -1; } } - return -1; + else { + matchIndex = (b == delimiter1) ? 1 : 0; + } + return matchIndex; } @Override @@ -1026,7 +1117,7 @@ private static class ReadCompletionHandler implements CompletionHandler state = new AtomicReference<>(State.IDLE); public ReadCompletionHandler(AsynchronousFileChannel channel, - FluxSink sink, long position, DataBufferFactory dataBufferFactory, int bufferSize) { + FluxSink sink, long position, DataBufferFactory dataBufferFactory, int bufferSize) { this.channel = channel; this.sink = sink; From 1bf232d1adea495d4f5438caf74dc805f14774b4 Mon Sep 17 00:00:00 2001 From: Nabil Fawwaz Elqayyim Date: Fri, 22 Aug 2025 21:06:31 +0700 Subject: [PATCH 2/2] Refactor AbstractNestedMatcher to use per-instance buffer - Replace ThreadLocal buffer with a per-instance reusable buffer - Improves memory locality and reduces ThreadLocal overhead - Update Javadoc for clarity, performance notes, and subclassing guidance Closes gh-34651 Signed-off-by: Nabil Fawwaz Elqayyim --- .../core/io/buffer/DataBufferUtils.java | 63 ++++++++----------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/spring-core/src/main/java/org/springframework/core/io/buffer/DataBufferUtils.java b/spring-core/src/main/java/org/springframework/core/io/buffer/DataBufferUtils.java index 493c268ed511..4079e4d9a3cc 100644 --- a/spring-core/src/main/java/org/springframework/core/io/buffer/DataBufferUtils.java +++ b/spring-core/src/main/java/org/springframework/core/io/buffer/DataBufferUtils.java @@ -61,6 +61,7 @@ * * @author Arjen Poutsma * @author Brian Clozel + * @author Nabil Fawwaz Elqayyim * @since 5.0 */ public abstract class DataBufferUtils { @@ -859,23 +860,17 @@ public void reset() { /** - * An abstract base implementation of {@link NestedMatcher} that looks for - * a specific delimiter in a {@link DataBuffer}. - *

- * Uses a thread-local buffer to scan data in chunks, reducing memory - * allocations and improving performance when processing large buffers. - *

+ * Base {@link NestedMatcher} implementation that scans a {@link DataBuffer} + * for a specific delimiter. * - *

- * Each matcher keeps its own match state, so it is intended for - * single-threaded use. The thread-local buffer ensures that multiple - * threads can run their own matchers independently without interfering. - *

+ *

Relies on a per-instance reusable buffer to scan data in chunks, + * minimizing allocations and improving performance for large or streaming data.

* - *

- * Subclasses can extend this class to add custom matching behavior while - * reusing the built-in delimiter tracking and scanning logic. - *

+ *

Each matcher maintains its own state and buffer, ensuring safe use + * in reactive pipelines where execution may shift across threads.

+ * + *

Subclasses may extend this class to customize matching strategies + * while reusing the built-in delimiter tracking and scanning logic.

* * @see NestedMatcher * @see DataBuffer @@ -886,8 +881,7 @@ private abstract static class AbstractNestedMatcher implements NestedMatcher { private int matches = 0; - // Thread-local chunk buffer to avoid per-call allocations - private static final ThreadLocal LOCAL_BUFFER = ThreadLocal.withInitial(() -> new byte[8 * 1024]); + private final byte[] localBuffer = new byte[8 * 1024]; // Reusable buffer per matcher instance protected AbstractNestedMatcher(byte[] delimiter) { this.delimiter = delimiter; @@ -901,25 +895,21 @@ protected int getMatches() { return this.matches; } - protected static void releaseLocalBuffer() { - LOCAL_BUFFER.remove(); - } - @Override public int match(DataBuffer dataBuffer) { final int readPos = dataBuffer.readPosition(); final int writePos = dataBuffer.writePosition(); final int length = writePos - readPos; - final byte[] delimiter0 = this.delimiter; - final int delimiterLen = delimiter0.length; - final byte delimiter1 = delimiter0[0]; + final byte[] delimiterBytes = this.delimiter; + final int delimiterLength = delimiterBytes.length; + final byte delimiterFirstByte = delimiterBytes[0]; - int matchIndex = this.matches; - - final byte[] chunk = LOCAL_BUFFER.get(); + final byte[] chunk = localBuffer; final int chunkSize = Math.min(chunk.length, length); + int matchIndex = this.matches; + try { for (int offset = 0; offset < length; offset += chunkSize) { int currentChunkSize = Math.min(chunkSize, length - offset); @@ -927,7 +917,7 @@ public int match(DataBuffer dataBuffer) { dataBuffer.readPosition(readPos + offset); dataBuffer.read(chunk, 0, currentChunkSize); - matchIndex = processChunk(chunk, currentChunkSize, delimiter0, delimiterLen, delimiter1, matchIndex, readPos, offset); + matchIndex = processChunk(chunk, currentChunkSize, delimiterBytes, delimiterLength, delimiterFirstByte, matchIndex, readPos, offset); if (matchIndex < 0) { return -(matchIndex + 1); // found, returning actual position } @@ -938,21 +928,20 @@ public int match(DataBuffer dataBuffer) { } finally { dataBuffer.readPosition(readPos); // restore original position - releaseLocalBuffer(); } } - private int processChunk(byte[] chunk, int currentChunkSize, byte[] delimiter0, int delimiterLen, byte delimiter1, int matchIndex, int readPos, int offset) { + private int processChunk(byte[] chunk, int currentChunkSize, byte[] delimiterBytes, int delimiterLen, byte delimiterFirstByte, int matchIndex, int readPos, int offset) { int i = 0; while (i < currentChunkSize) { if (matchIndex == 0) { - i = findNextCandidate(chunk, i, currentChunkSize, delimiter1); + i = findNextCandidate(chunk, i, currentChunkSize, delimiterFirstByte); if (i >= currentChunkSize) { return matchIndex; // no candidate in this chunk } } - matchIndex = updateMatchIndex(chunk[i], delimiter0, delimiterLen, delimiter1, matchIndex); + matchIndex = updateMatchIndex(chunk[i], delimiterBytes, delimiterLen, delimiterFirstByte, matchIndex); if (matchIndex == -1) { return -(readPos + offset + i + 1); // return found delimiter position (encoded as negative) } @@ -961,16 +950,16 @@ private int processChunk(byte[] chunk, int currentChunkSize, byte[] delimiter0, return matchIndex; } - private int findNextCandidate(byte[] chunk, int start, int limit, byte delimiter1) { + private int findNextCandidate(byte[] chunk, int start, int limit, byte delimiterFirstByte) { int j = start; - while (j < limit && chunk[j] != delimiter1) { + while (j < limit && chunk[j] != delimiterFirstByte) { j++; } return j; } - private int updateMatchIndex(byte b, byte[] delimiter0, int delimiterLen, byte delimiter1, int matchIndex) { - if (b == delimiter0[matchIndex]) { + private int updateMatchIndex(byte b, byte[] delimiterBytes, int delimiterLen, byte delimiterFirstByte, int matchIndex) { + if (b == delimiterBytes[matchIndex]) { matchIndex++; if (matchIndex == delimiterLen) { reset(); @@ -978,7 +967,7 @@ private int updateMatchIndex(byte b, byte[] delimiter0, int delimiterLen, byte d } } else { - matchIndex = (b == delimiter1) ? 1 : 0; + matchIndex = (b == delimiterFirstByte) ? 1 : 0; } return matchIndex; }