@@ -267,8 +267,8 @@ namespace csv {
267
267
return CSV_NOT_FOUND;
268
268
}
269
269
270
- void CSVReader::feed (std::unique_ptr< char []> && buff) {
271
- this ->feed (csv::string_view (buff.get ()) );
270
+ void CSVReader::feed (WorkItem && buff) {
271
+ this ->feed ( csv::string_view (buff.first . get (), buff. second ) );
272
272
}
273
273
274
274
void CSVReader::feed (csv::string_view in) {
@@ -296,56 +296,72 @@ namespace csv {
296
296
this ->record_buffer ->reserve (in.size ());
297
297
std::string& _record_buffer = *(this ->record_buffer .get ());
298
298
299
- for (size_t i = 0 ; i < in.size (); i++) {
300
- if (!quote_escape) {
301
- switch (this ->parse_flags [in[i] + 128 ]) {
302
- case NOT_SPECIAL:
303
- _record_buffer +=in[i];
304
- break ;
299
+ const size_t in_size = in.size ();
300
+ for (size_t i = 0 ; i < in_size; i++) {
301
+ switch (this ->parse_flags [in[i] + 128 ]) {
305
302
case DELIMITER:
306
- this ->split_buffer .push_back (this ->record_buffer .size ());
307
- break ;
303
+ if (!quote_escape) {
304
+ this ->split_buffer .push_back (this ->record_buffer .size ());
305
+ break ;
306
+ }
308
307
case NEWLINE:
309
- // End of record -> Write record
310
- if (i + 1 < in.size () && in[i + 1 ] == ' \n ' ) // Catches CRLF (or LFLF)
311
- ++i;
312
- this ->write_record ();
313
- break ;
314
- default : // Quote
315
- // Case: Previous character was delimiter or newline
316
- if (i) { // Don't deref past beginning
317
- auto prev_ch = this ->parse_flags [in[i - 1 ] + 128 ];
318
- if (prev_ch >= DELIMITER) quote_escape = true ;
308
+ if (!quote_escape) {
309
+ // End of record -> Write record
310
+ if (i + 1 < in_size && in[i + 1 ] == ' \n ' ) // Catches CRLF (or LFLF)
311
+ ++i;
312
+ this ->write_record ();
313
+ break ;
319
314
}
315
+ case NOT_SPECIAL: {
316
+ // Optimization: Since NOT_SPECIAL characters tend to occur in contiguous
317
+ // sequences, use the loop below to avoid having to go through the outer
318
+ // switch statement as much as possible
319
+ #if __cplusplus >= 201703L
320
+ size_t start = i;
321
+ while (i + 1 < in_size && this ->parse_flags [in[i + 1 ] + 128 ] == NOT_SPECIAL) {
322
+ i++;
323
+ }
324
+
325
+ _record_buffer += in.substr (start, i - start + 1 );
326
+ #else
327
+ _record_buffer += in[i];
328
+
329
+ while (i + 1 < in_size && this ->parse_flags [in[i + 1 ] + 128 ] == NOT_SPECIAL) {
330
+ _record_buffer += in[++i];
331
+ }
332
+ #endif
333
+
320
334
break ;
321
335
}
322
- }
323
- else {
324
- switch (this ->parse_flags [in[i] + 128 ]) {
325
- case NOT_SPECIAL:
326
- case DELIMITER:
327
- case NEWLINE:
328
- // Treat as a regular character
329
- _record_buffer +=in[i];
330
- break ;
331
336
default : // Quote
337
+ if (!quote_escape) {
338
+ // Don't deref past beginning
339
+ if (i && this ->parse_flags [in[i - 1 ] + 128 ] >= DELIMITER) {
340
+ // Case: Previous character was delimiter or newline
341
+ quote_escape = true ;
342
+ }
343
+
344
+ break ;
345
+ }
346
+
332
347
auto next_ch = this ->parse_flags [in[i + 1 ] + 128 ];
333
348
if (next_ch >= DELIMITER) {
334
349
// Case: Delim or newline => end of field
335
350
quote_escape = false ;
351
+ break ;
336
352
}
337
- else {
338
- // Case: Escaped quote
339
- _record_buffer +=in[i];
340
-
341
- if (next_ch == QUOTE)
342
- ++i; // Case: Two consecutive quotes
343
- else if (this ->strict )
344
- throw std::runtime_error (" Unescaped single quote around line " +
345
- std::to_string (this ->correct_rows ) + " near:\n " +
346
- std::string (in.substr (i, 100 )));
347
- }
348
- }
353
+
354
+ // Case: Escaped quote
355
+ _record_buffer += in[i];
356
+
357
+ if (next_ch == QUOTE)
358
+ ++i; // Case: Two consecutive quotes
359
+ else if (this ->strict )
360
+ throw std::runtime_error (" Unescaped single quote around line " +
361
+ std::to_string (this ->correct_rows ) + " near:\n " +
362
+ std::string (in.substr (i, 100 )));
363
+
364
+ break ;
349
365
}
350
366
}
351
367
@@ -415,7 +431,7 @@ namespace csv {
415
431
this ->feed_buffer .pop_front ();
416
432
417
433
// Nullptr --> Die
418
- if (!in) break ;
434
+ if (!in. first ) break ;
419
435
420
436
lock.unlock (); // Release lock
421
437
this ->feed (std::move (in));
@@ -455,11 +471,12 @@ namespace csv {
455
471
char * result = std::fgets (line_buffer, internals::PAGE_SIZE, this ->infile );
456
472
if (result == NULL ) break ;
457
473
line_buffer += std::strlen (line_buffer);
474
+ size_t current_strlen = line_buffer - buffer.get ();
458
475
459
- if ((line_buffer - buffer. get ()) >= 0.9 * BUFFER_UPPER_LIMIT) {
476
+ if (current_strlen >= 0.9 * BUFFER_UPPER_LIMIT) {
460
477
processed += (line_buffer - buffer.get ());
461
478
std::unique_lock<std::mutex> lock{ this ->feed_lock };
462
- this ->feed_buffer .push_back (std::move (buffer));
479
+ this ->feed_buffer .push_back (std::make_pair<>( std:: move (buffer), current_strlen ));
463
480
this ->feed_cond .notify_one ();
464
481
465
482
buffer = std::unique_ptr<char []>(new char [BUFFER_UPPER_LIMIT]); // New pointer
@@ -470,8 +487,8 @@ namespace csv {
470
487
471
488
// Feed remaining bits
472
489
std::unique_lock<std::mutex> lock{ this ->feed_lock };
473
- this ->feed_buffer .push_back (std::move (buffer));
474
- this ->feed_buffer .push_back (nullptr ); // Termination signal
490
+ this ->feed_buffer .push_back (std::make_pair<>( std:: move (buffer), line_buffer - buffer. get () ));
491
+ this ->feed_buffer .push_back (std::make_pair<>( nullptr , 0 ) ); // Termination signal
475
492
this ->feed_cond .notify_one ();
476
493
lock.unlock ();
477
494
worker.join ();
0 commit comments