Skip to content

Commit 1e25214

Browse files
Improve CborStreamReader Logic to Handle Nesting and End-of-Buffer Corner Cases (#3956)
1 parent 50bb85d commit 1e25214

File tree

2 files changed

+276
-27
lines changed

2 files changed

+276
-27
lines changed

extensions/src/AWSSDK.Extensions.CborProtocol/Internal/CborStreamReader.cs

Lines changed: 62 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -106,13 +106,6 @@ private void RefillBuffer(int bytesToSkip = 0)
106106
// Update the total size of valid data in our buffer.
107107
_currentChunkSize = leftoverBytesCount + bytesReadFromStream;
108108

109-
// Check for a malformed stream: if we have leftovers but the stream is empty,
110-
// it means the CBOR data was truncated.
111-
if (bytesReadFromStream == 0 && leftoverBytesCount > 0)
112-
{
113-
throw new CborContentException("Stream ended unexpectedly with an incomplete CBOR data item.");
114-
}
115-
116109
var newMemorySlice = new ReadOnlyMemory<byte>(_buffer, 0, _currentChunkSize);
117110
_internalCborReader.Reset(newMemorySlice);
118111

@@ -139,8 +132,16 @@ private T ExecuteRead<T>(Func<CborReader, T> readOperation)
139132
{
140133
return readOperation(_internalCborReader);
141134
}
142-
catch (CborContentException ex)
135+
catch (Exception ex) when (ex is InvalidOperationException || ex is CborContentException)
143136
{
137+
// Both InvalidOperationException and CborContentException can occur when the reader
138+
// runs out of buffered data mid-item or encounters an incomplete CBOR structure.
139+
// - CborContentException: typically thrown when encountering invalid CBOR content,
140+
// such as a premature break or truncated container.
141+
// - InvalidOperationException: can happen when the reader attempts to interpret data
142+
// as a different type due to hitting a buffer boundary before the full item is available.
143+
// We catch both, trigger a buffer refill, and retry before deciding if it is a genuine error.
144+
144145
if (_currentChunkSize == 0 && _internalCborReader.BytesRemaining == 0)
145146
{
146147
// Fail fast if we’ve already consumed all input and nothing remains to refill.
@@ -200,14 +201,32 @@ private void ReadEndContainer(CborContainerType expectedType, CborReaderState ex
200201

201202
if (state == CborReaderState.Finished)
202203
{
204+
// We got CborReaderState.Finished which means the reader has exhausted the bytes currently
205+
// given to it and the next token may live in the next chunk that we haven't read yet.
206+
//
207+
// For indefinite-length containers the end of the container is a break marker byte (0xFF).
208+
// If that break byte is the next byte in the stream, we must consume it, otherwise the reader
209+
// will become desynchronized (it will still think we're inside a container while removed the
210+
// container from the _nestingStack).
211+
// This byte can exist in the next chunk, so when we hit Finished we first attempt to refill
212+
// the buffer (no skip) so the reader can see the next byte(s). Only after refilling can we safely
213+
// determine whether the next byte is a break byte that terminates the current container.
214+
215+
// Try to refill first, maybe the break marker is in the next chunk.
216+
RefillBuffer(0);
217+
203218
if (IsNextByteEndOfContainer())
204219
{
220+
// If the next raw byte after refill is the CBOR "break" (0xFF), that indicates an
221+
// indefinite-length container terminator. We must explicitly consume that break byte
222+
// so we can remove the item from the _nestingStack and parsing can continue correctly.
223+
//
224+
// We call RefillBuffer(1) to skip the break byte and rebuild the internal reader.
225+
// It consumes the break byte from the leftover buffer so subsequent calls to the CborReader
226+
// see the next item.
227+
205228
RefillBuffer(1); // Skip the break marker (0xFF)
206229
}
207-
else
208-
{
209-
RefillBuffer(0); // This means we are in a definite-length map/array which doesn't end with 0xFF.
210-
}
211230
_nestingStack.Pop();
212231
return true;
213232
}
@@ -247,26 +266,42 @@ public CborReaderState PeekState()
247266
{
248267
return ExecuteRead(r =>
249268
{
250-
try
269+
// We need to Peek twice in case the first time failed because we are near the end of the current chunk and we just need to refill.
270+
for (int attempt = 0; attempt < 2; attempt++)
251271
{
252-
return r.PeekState();
272+
try
273+
{
274+
var state = r.PeekState();
275+
if (state == CborReaderState.Finished && _nestingStack.Count > 0)
276+
{
277+
_logger.DebugFormat("PeekState returned Finished, but nesting stack is not empty. Attempting refill.");
278+
RefillBuffer(0);
279+
continue;
280+
}
281+
282+
return state;
283+
}
284+
catch (CborContentException ex)
285+
{
286+
// PeekState threw an exception, we will attempt to refill in case we aren't at the end of the stream.
287+
_logger.Debug(ex, "PeekState threw exception (attempt #{0}). Attempting refill.", attempt + 1);
288+
RefillBuffer(0);
289+
}
253290
}
254-
catch (CborContentException ex)
291+
292+
// If PeekState still fails after refilling, and we're truly at the end of the stream,
293+
// only then consider inferring the state based on container nesting.
294+
if (_nestingStack.Count > 0)
255295
{
256-
// Translate a Break code to the appropriate container end state
257-
// based on our own nesting stack.
258-
if (_nestingStack.Count > 0)
259-
{
260-
var inferredState = _nestingStack.Peek() == CborContainerType.Map
261-
? CborReaderState.EndMap
262-
: CborReaderState.EndArray;
296+
var inferredState = _nestingStack.Peek() == CborContainerType.Map
297+
? CborReaderState.EndMap
298+
: CborReaderState.EndArray;
263299

264-
_logger.Debug(ex, "CborContentException during PeekState interpreted as {0} due to nesting stack.", inferredState);
265-
return inferredState;
266-
}
267-
// If our stack is empty, it's a genuine error.
268-
throw;
300+
_logger.DebugFormat("CborContentException during PeekState interpreted as {0} due to nesting stack.", inferredState);
301+
return inferredState;
269302
}
303+
304+
throw new CborContentException("Unable to determine CBOR reader state after retries, and no containers remain.");
270305
});
271306
}
272307

extensions/test/CborProtocol.Tests/CborStreamReaderTests.cs

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,5 +404,219 @@ public void Unmarshall_DeeplyNestedStructure()
404404
reader.ReadEndMap();
405405
}
406406
}
407+
408+
[Fact]
409+
public void PeekState_DoesNotAssumeEnd_WhenValueIsInNextChunk()
410+
{
411+
var writer = new CborWriter();
412+
writer.WriteStartMap(null);
413+
writer.WriteTextString("key");
414+
writer.WriteTextString(new string('X', 95)); // Forces a refill before the value is fully read
415+
writer.WriteEndMap();
416+
417+
byte[] bytes = writer.Encode();
418+
using var stream = new MemoryStream(bytes);
419+
using var reader = new CborStreamReader(stream);
420+
421+
reader.ReadStartMap();
422+
Assert.Equal("key", reader.ReadTextString());
423+
424+
// This should trigger a refill, not treat it as EndMap
425+
var value = reader.ReadTextString();
426+
Assert.Equal(95, value.Length);
427+
428+
reader.ReadEndMap();
429+
}
430+
431+
[Fact]
432+
public void Handles_BreakMarker_AtStartOfNextChunk()
433+
{
434+
var writer = new CborWriter();
435+
writer.WriteStartArray(null);
436+
writer.WriteTextString(new string('A', 95)); // Fills buffer almost completely
437+
writer.WriteEndArray(); // Writes 0xFF as break byte
438+
439+
var bytes = writer.Encode();
440+
using var stream = new MemoryStream(bytes);
441+
using var reader = new CborStreamReader(stream);
442+
443+
reader.ReadStartArray();
444+
string value = reader.ReadTextString();
445+
446+
// This will cause a refill where the 0xFF is the first byte of the new chunk
447+
reader.ReadEndArray();
448+
449+
Assert.Equal(CborReaderState.Finished, reader.PeekState());
450+
}
451+
452+
[Fact]
453+
public void Handles_MultipleBreakMarkers_AtEndOfStream()
454+
{
455+
var writer = new CborWriter();
456+
writer.WriteStartArray(null);
457+
writer.WriteStartArray(null);
458+
writer.WriteTextString("val");
459+
writer.WriteEndArray(); // Ends inner
460+
writer.WriteEndArray(); // Ends outer
461+
462+
byte[] bytes = writer.Encode();
463+
using var stream = new MemoryStream(bytes);
464+
using var reader = new CborStreamReader(stream);
465+
466+
reader.ReadStartArray(); // outer
467+
reader.ReadStartArray(); // inner
468+
Assert.Equal("val", reader.ReadTextString());
469+
470+
// Should skip 0xFF, refill, then skip another 0xFF
471+
reader.ReadEndArray();
472+
reader.ReadEndArray();
473+
474+
Assert.Equal(CborReaderState.Finished, reader.PeekState());
475+
}
476+
477+
[Fact]
478+
public void DoesNotInferEndMap_WhenContentFollows()
479+
{
480+
var writer = new CborWriter();
481+
writer.WriteStartMap(2);
482+
writer.WriteTextString("k1");
483+
writer.WriteTextString(new string('A', 80));
484+
writer.WriteTextString("k2");
485+
writer.WriteTextString(new string('B', 80));
486+
writer.WriteEndMap();
487+
488+
byte[] bytes = writer.Encode();
489+
using var stream = new MemoryStream(bytes);
490+
using var reader = new CborStreamReader(stream);
491+
492+
reader.ReadStartMap();
493+
Assert.Equal("k1", reader.ReadTextString());
494+
Assert.Equal(80, reader.ReadTextString().Length);
495+
496+
Assert.Equal("k2", reader.ReadTextString());
497+
498+
// This should not throw or prematurely infer EndMap
499+
Assert.Equal(80, reader.ReadTextString().Length);
500+
501+
reader.ReadEndMap();
502+
}
503+
504+
[Fact]
505+
public void Handles_DefinedLengthMap_ThatEndsExactlyAtEof()
506+
{
507+
var writer = new CborWriter();
508+
writer.WriteStartMap(1);
509+
writer.WriteTextString("key");
510+
writer.WriteTextString("value");
511+
writer.WriteEndMap(); // Definite-length, no break byte
512+
513+
var bytes = writer.Encode();
514+
using var stream = new MemoryStream(bytes);
515+
using var reader = new CborStreamReader(stream);
516+
517+
reader.ReadStartMap();
518+
Assert.Equal("key", reader.ReadTextString());
519+
Assert.Equal("value", reader.ReadTextString());
520+
reader.ReadEndMap();
521+
522+
Assert.Equal(CborReaderState.Finished, reader.PeekState());
523+
}
524+
525+
[Fact]
526+
public void Handles_NestedMapEndAtBufferBoundary_FollowedByOuterKey()
527+
{
528+
var writer = new CborWriter();
529+
writer.WriteStartMap(null); // Outer map
530+
writer.WriteTextString("outerKey1");
531+
writer.WriteStartMap(null); // Inner map
532+
writer.WriteTextString("innerKey");
533+
writer.WriteTextString("innerVal");
534+
writer.WriteEndMap(); // Ends inner map
535+
writer.WriteTextString("outerKey2");
536+
writer.WriteTextString("outerVal");
537+
writer.WriteEndMap(); // Ends outer map
538+
539+
byte[] bytes = writer.Encode();
540+
541+
using var stream = new MemoryStream(bytes);
542+
using var reader = new CborStreamReader(stream);
543+
544+
reader.ReadStartMap();
545+
Assert.Equal("outerKey1", reader.ReadTextString());
546+
reader.ReadStartMap();
547+
Assert.Equal("innerKey", reader.ReadTextString());
548+
Assert.Equal("innerVal", reader.ReadTextString());
549+
550+
// This ReadEndMap will bring us to the end of current buffer
551+
reader.ReadEndMap(); // This will trigger refill internally if needed
552+
553+
// Without a refill, the next ReadTextString() throws:
554+
// "No more CBOR data items to read in the current context."
555+
Assert.Equal("outerKey2", reader.ReadTextString());
556+
Assert.Equal("outerVal", reader.ReadTextString());
557+
reader.ReadEndMap();
558+
Assert.Equal(CborReaderState.Finished, reader.PeekState());
559+
}
560+
561+
[Fact]
562+
public void ReadTextString_TriggersRefill_After_ReadEndMap_AtBufferEnd()
563+
{
564+
var writer = new CborWriter();
565+
writer.WriteStartMap(null); // Outer map
566+
writer.WriteTextString("outerKey1");
567+
writer.WriteStartMap(null); // Inner map
568+
writer.WriteTextString("innerKey");
569+
writer.WriteTextString(new string('A', 30)); // Ensure we consume most of the buffer
570+
writer.WriteEndMap(); // End inner map — should still fit in buffer
571+
writer.WriteTextString("outerKey2"); // Starts in next refill
572+
writer.WriteTextString("outerVal");
573+
writer.WriteEndMap(); // End outer map
574+
575+
byte[] bytes = writer.Encode();
576+
var stream = new MemoryStream(bytes);
577+
578+
using var reader = new CborStreamReader(stream);
579+
580+
reader.ReadStartMap(); // outer map
581+
Assert.Equal("outerKey1", reader.ReadTextString());
582+
583+
reader.ReadStartMap(); // inner map
584+
Assert.Equal("innerKey", reader.ReadTextString());
585+
Assert.Equal(new string('A', 30), reader.ReadTextString());
586+
587+
reader.ReadEndMap(); // This must succeed without triggering refill
588+
589+
// This next read should trigger refill and continue parsing correctly
590+
Assert.Equal("outerKey2", reader.ReadTextString());
591+
Assert.Equal("outerVal", reader.ReadTextString());
592+
reader.ReadEndMap(); // outer map
593+
Assert.Equal(CborReaderState.Finished, reader.PeekState());
594+
}
595+
596+
[Fact]
597+
public void Unmarshall_MultipleNestedIndefiniteMapsEndingAtStreamEnd_ShouldSucceed()
598+
{
599+
var writer = new CborWriter();
600+
writer.WriteStartMap(null);
601+
writer.WriteTextString("nested");
602+
writer.WriteStartMap(null);
603+
writer.WriteTextString("deep");
604+
writer.WriteTextString("value");
605+
writer.WriteEndMap(); // emits 0xFF
606+
writer.WriteEndMap(); // emits 0xFF
607+
608+
byte[] bytes = writer.Encode();
609+
var stream = new MemoryStream(bytes);
610+
611+
using var reader = new CborStreamReader(stream);
612+
reader.ReadStartMap();
613+
Assert.Equal("nested", reader.ReadTextString());
614+
reader.ReadStartMap();
615+
Assert.Equal("deep", reader.ReadTextString());
616+
Assert.Equal("value", reader.ReadTextString());
617+
reader.ReadEndMap(); // should correctly handle 0xFF at end
618+
reader.ReadEndMap(); // should correctly handle 0xFF at end
619+
}
620+
407621
}
408622

0 commit comments

Comments
 (0)