@@ -81,6 +81,7 @@ public struct AsyncBufferSequence: AsyncSequence, Sendable {
81
81
return Iterator ( diskIO: self . diskIO)
82
82
}
83
83
84
+ // [New API: 0.0.1]
84
85
public func lines< Encoding: _UnicodeEncoding > (
85
86
encoding: Encoding . Type = UTF8 . self,
86
87
bufferingPolicy: LineSequence < Encoding > . BufferingPolicy = . unbounded
@@ -91,6 +92,7 @@ public struct AsyncBufferSequence: AsyncSequence, Sendable {
91
92
92
93
// MARK: - LineSequence
93
94
extension AsyncBufferSequence {
95
+ // [New API: 0.0.1]
94
96
public struct LineSequence < Encoding: _UnicodeEncoding > : AsyncSequence , Sendable {
95
97
public typealias Element = String
96
98
@@ -140,25 +142,20 @@ extension AsyncBufferSequence {
140
142
}
141
143
#else
142
144
// Unfortunately here we _have to_ copy the bytes out because
143
- // DisptachIO (rightfully) reuses buffer, which means `buffer.data`
145
+ // DispatchIO (rightfully) reuses buffer, which means `buffer.data`
144
146
// has the same address on all iterations, therefore we can't directly
145
147
// create the result array from buffer.data
146
- let temporary = UnsafeMutableBufferPointer< Encoding . CodeUnit> . allocate(
147
- capacity: buffer. data. count
148
- )
149
- defer { temporary. deallocate ( ) }
150
- let actualBytesCopied = buffer. data. copyBytes (
151
- to: temporary,
152
- count: buffer. data. count
153
- )
154
148
155
149
// Calculate how many CodePoint elements we have
156
- let elementCount = actualBytesCopied / MemoryLayout< Encoding . CodeUnit> . stride
150
+ let elementCount = buffer . data . count / MemoryLayout< Encoding . CodeUnit> . stride
157
151
158
152
// Create array by copying from the buffer reinterpreted as CodePoint
159
- let result : Array < Encoding . CodeUnit > = Array (
160
- UnsafeBufferPointer ( start: temporary. baseAddress, count: elementCount)
161
- )
153
+ let result : Array < Encoding . CodeUnit > = buffer. data. withUnsafeBytes { ptr -> Array < Encoding . CodeUnit > in
154
+ return Array (
155
+ UnsafeBufferPointer ( start: ptr. baseAddress? . assumingMemoryBound ( to: Encoding . CodeUnit. self) , count: elementCount)
156
+ )
157
+ }
158
+
162
159
#endif
163
160
return result. isEmpty ? nil : result
164
161
}
@@ -180,18 +177,38 @@ extension AsyncBufferSequence {
180
177
/// let formFeed = Encoding.CodeUnit(0x0C)
181
178
let carriageReturn = Encoding . CodeUnit ( 0x0D )
182
179
// carriageReturn + lineFeed
183
- let newLine : Encoding . CodeUnit
184
- let lineSeparator : Encoding . CodeUnit
185
- let paragraphSeparator : Encoding . CodeUnit
180
+ let newLine1 : Encoding . CodeUnit
181
+ let newLine2 : Encoding . CodeUnit
182
+ let lineSeparator1 : Encoding . CodeUnit
183
+ let lineSeparator2 : Encoding . CodeUnit
184
+ let lineSeparator3 : Encoding . CodeUnit
185
+ let paragraphSeparator1 : Encoding . CodeUnit
186
+ let paragraphSeparator2 : Encoding . CodeUnit
187
+ let paragraphSeparator3 : Encoding . CodeUnit
186
188
switch Encoding . CodeUnit. self {
187
189
case is UInt8 . Type :
188
- newLine = Encoding . CodeUnit ( 0xC2 ) // 0xC2 0x85
189
- lineSeparator = Encoding . CodeUnit ( 0xE2 ) // 0xE2 0x80 0xA8
190
- paragraphSeparator = Encoding . CodeUnit ( 0xE2 ) // 0xE2 0x80 0xA9
190
+ newLine1 = Encoding . CodeUnit ( 0xC2 )
191
+ newLine2 = Encoding . CodeUnit ( 0x85 )
192
+
193
+ lineSeparator1 = Encoding . CodeUnit ( 0xE2 )
194
+ lineSeparator2 = Encoding . CodeUnit ( 0x80 )
195
+ lineSeparator3 = Encoding . CodeUnit ( 0xA8 )
196
+
197
+ paragraphSeparator1 = Encoding . CodeUnit ( 0xE2 )
198
+ paragraphSeparator2 = Encoding . CodeUnit ( 0x80 )
199
+ paragraphSeparator3 = Encoding . CodeUnit ( 0xA9 )
191
200
case is UInt16 . Type , is UInt32 . Type :
192
- newLine = Encoding . CodeUnit ( 0x0085 )
193
- lineSeparator = Encoding . CodeUnit ( 0x2028 )
194
- paragraphSeparator = Encoding . CodeUnit ( 0x2029 )
201
+ // UTF16 and UTF32 use one byte for all
202
+ newLine1 = Encoding . CodeUnit ( 0x0085 )
203
+ newLine2 = Encoding . CodeUnit ( 0x0085 )
204
+
205
+ lineSeparator1 = Encoding . CodeUnit ( 0x2028 )
206
+ lineSeparator2 = Encoding . CodeUnit ( 0x2028 )
207
+ lineSeparator3 = Encoding . CodeUnit ( 0x2028 )
208
+
209
+ paragraphSeparator1 = Encoding . CodeUnit ( 0x2029 )
210
+ paragraphSeparator2 = Encoding . CodeUnit ( 0x2029 )
211
+ paragraphSeparator3 = Encoding . CodeUnit ( 0x2029 )
195
212
default :
196
213
fatalError ( " Unknown encoding type \( Encoding . self) " )
197
214
}
@@ -210,10 +227,13 @@ extension AsyncBufferSequence {
210
227
var currentIndex : Int = self . startIndex
211
228
for index in self . startIndex ..< self . buffer. count {
212
229
currentIndex = index
213
- // Early return if we exceed max line length
230
+ // Throw if we exceed max line length
214
231
if case . maxLineLength( let maxLength) = self . bufferingPolicy,
215
232
currentIndex >= maxLength {
216
- return yield ( at: currentIndex)
233
+ throw SubprocessError (
234
+ code: . init( . streamOutputExceedsLimit( maxLength) ) ,
235
+ underlyingError: nil
236
+ )
217
237
}
218
238
let byte = self . buffer [ currentIndex]
219
239
switch byte {
@@ -232,12 +252,12 @@ extension AsyncBufferSequence {
232
252
continue
233
253
}
234
254
return result
235
- case newLine :
255
+ case newLine1 :
236
256
var targetIndex = currentIndex
237
257
if Encoding . CodeUnit. self is UInt8 . Type {
238
258
// For UTF8, look for the next 0x85 byte
239
259
guard ( targetIndex + 1 ) < self . buffer. count,
240
- self . buffer [ targetIndex + 1 ] == Encoding . CodeUnit ( 0x85 ) else {
260
+ self . buffer [ targetIndex + 1 ] == newLine2 else {
241
261
// Not a valid new line. Keep looking
242
262
continue
243
263
}
@@ -248,21 +268,22 @@ extension AsyncBufferSequence {
248
268
continue
249
269
}
250
270
return result
251
- case lineSeparator , paragraphSeparator :
271
+ case lineSeparator1 , paragraphSeparator1 :
252
272
var targetIndex = currentIndex
253
273
if Encoding . CodeUnit. self is UInt8 . Type {
254
- // For UTF8, look for the next 0x80 byte
274
+ // For UTF8, look for the next byte
255
275
guard ( targetIndex + 1 ) < self . buffer. count,
256
- self . buffer [ targetIndex + 1 ] == Encoding . CodeUnit ( 0x80 ) else {
276
+ self . buffer [ targetIndex + 1 ] == lineSeparator2 ||
277
+ self . buffer [ targetIndex + 1 ] == paragraphSeparator2 else {
257
278
// Not a valid new line. Keep looking
258
279
continue
259
280
}
260
- // Swallow 0x80 byte
281
+ // Swallow next byte
261
282
targetIndex += 1
262
- // Look for the final 0xA8 (lineSeparator) or 0xA9 (paragraphSeparator)
283
+ // Look for the final byte
263
284
guard ( targetIndex + 1 ) < self . buffer. count,
264
- ( self . buffer [ targetIndex + 1 ] == Encoding . CodeUnit ( 0xA8 ) ||
265
- self . buffer [ targetIndex + 1 ] == Encoding . CodeUnit ( 0xA9 ) ) else {
285
+ ( self . buffer [ targetIndex + 1 ] == lineSeparator3 ||
286
+ self . buffer [ targetIndex + 1 ] == paragraphSeparator3 ) else {
266
287
// Not a valid new line. Keep looking
267
288
continue
268
289
}
@@ -308,9 +329,8 @@ extension AsyncBufferSequence.LineSequence {
308
329
/// on the number of buffered elements (line length).
309
330
case unbounded
310
331
/// Impose a max buffer size (line length) limit.
311
- /// When using this policy, `LineSequence` will return a line if:
312
- /// - A newline character is encountered (standard behavior)
313
- /// - The current line in the buffer reaches or exceeds the specified maximum length
332
+ /// Subprocess **will throw an error** if the number of buffered
333
+ /// elements (line length) exceeds the limit
314
334
case maxLineLength( Int )
315
335
}
316
336
}
0 commit comments