7575#define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
7676#define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
7777#define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
78- #define SCAN_WIDTH 0x8 /* A width value was supplied. */
7978
8079#define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
8180#define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
@@ -299,21 +298,18 @@ static void ReleaseCharSet(CharSet *cset)
299298 *
300299 *----------------------------------------------------------------------
301300*/
302- static int ValidateFormat (const zend_string * zstr_format , uint32_t format_arg_num , uint32_t numVars , uint32_t * totalSubs )
301+ static bool isFormatStringValid (const zend_string * zstr_format , uint32_t format_arg_num , uint32_t numVars , uint32_t * totalSubs )
303302{
304303#define STATIC_LIST_SIZE 16
305- int flags ;
306304 bool gotXpg = false;
307305 bool gotSequential = false;
308- const char * ch = NULL ;
309306 uint32_t staticAssign [STATIC_LIST_SIZE ];
310307 uint32_t * nassign = staticAssign ;
311308 uint32_t objIndex = 0 ;
312309 uint32_t xpgSize = 0 ;
313310 uint32_t nspace = STATIC_LIST_SIZE ;
314- zend_ulong value ;
311+ bool bindToVariables = numVars ;
315312
316- bool assignToVariables = numVars ;
317313 /*
318314 * Initialize an array that records the number of times a variable
319315 * is assigned to by the format string. We use this to detect if
@@ -325,100 +321,129 @@ static int ValidateFormat(const zend_string *zstr_format, uint32_t format_arg_nu
325321 }
326322 memset (nassign , 0 , sizeof (uint32_t )* numVars );
327323
328- const char * format = ZSTR_VAL (zstr_format );
329- while (* format != '\0' ) {
330- ch = format ++ ;
331- flags = 0 ;
332-
333- if (* ch != '%' ) {
324+ const char * end_ptr = ZSTR_VAL (zstr_format ) + ZSTR_LEN (zstr_format );
325+ for (const char * ptr = ZSTR_VAL (zstr_format ); ptr < end_ptr ; ptr ++ ) {
326+ bool isCurrentSpecifierBound = true;
327+ /* Look for specifier start */
328+ if (* ptr != '%' ) {
334329 continue ;
335330 }
336- ch = format ++ ;
337- if (* ch == '%' ) {
338- continue ;
331+ ptr ++ ;
332+
333+ if (UNEXPECTED (ptr == end_ptr )) {
334+ zend_argument_value_error (format_arg_num , "unterminated format specifier" );
335+ goto error ;
339336 }
340- if (* ch == '*' ) {
341- flags |= SCAN_SUPPRESS ;
342- ch = format ++ ;
343- goto xpgCheckDone ;
337+
338+ /* Literal % so continue */
339+ if (* ptr == '%' ) {
340+ continue ;
341+ } else if (* ptr == '*' ) {
342+ /* Consumed specifier but not assigned to variable */
343+ isCurrentSpecifierBound = false;
344+ ptr ++ ;
345+ if (UNEXPECTED (ptr == end_ptr )) {
346+ zend_argument_value_error (format_arg_num , "unterminated format specifier" );
347+ goto error ;
348+ }
344349 }
345350
346- if ( isdigit ( (int )* ch ) ) {
347- /*
348- * Check for an XPG3-style %n$ specification. Note: there
349- * must not be a mixture of XPG3 specs and non-XPG3 specs
351+ if (isdigit (* ptr )) {
352+ /* We might either have an XPG3-style %n$ specification,
353+ * or we are parsing the _maximum field width_ of the specifier.
354+ *
355+ * Note: there must not be a mixture of XPG3 specs and non-XPG3 specs
350356 * in the same format string.
351357 */
352- char * end = NULL ;
353- value = ZEND_STRTOUL (format - 1 , & end , 10 );
354- if (* end != '$' ) {
355- goto notXpg ;
356- }
357- format = end + 1 ;
358- ch = format ++ ;
359- gotXpg = true;
360- if (gotSequential ) {
361- goto mixedXPG ;
358+
359+ char * width_end = NULL ;
360+ zend_ulong width = ZEND_STRTOUL (ptr , & width_end , 10 );
361+ if (UNEXPECTED (width_end == end_ptr )) {
362+ zend_argument_value_error (format_arg_num , "unterminated format specifier" );
363+ goto error ;
362364 }
363- if ((value < 1 ) || (assignToVariables && (value > numVars ))) {
364- goto badIndex ;
365- } else if (!assignToVariables ) {
366- /*
367- * In the case where no vars are specified, the user can
368- * specify %9999$ legally, so we have to consider special
369- * rules for growing the assign array. 'value' is
370- * guaranteed to be > 0.
371- */
365+ ptr = width_end ;
372366
373- /* set a lower artificial limit on this
374- * in the interest of security and resource friendliness
375- * 255 arguments should be more than enough. - cc
376- */
377- if (value > SCAN_MAX_ARGS ) {
378- goto badIndex ;
367+ if (* ptr != '$' && isCurrentSpecifierBound ) {
368+ gotSequential = true;
369+ } else {
370+ /* We indeed have an XPG3 style spec */
371+ ptr ++ ;
372+ if (UNEXPECTED (ptr == end_ptr )) {
373+ zend_argument_value_error (format_arg_num , "unterminated format specifier" );
374+ goto error ;
375+ }
376+ if (UNEXPECTED (gotSequential )) {
377+ zend_argument_value_error (format_arg_num , "cannot mix \"%%\" and \"%%n$\" conversion specifiers" );
378+ goto error ;
379379 }
380380
381- xpgSize = (xpgSize > value ) ? xpgSize : value ;
382- }
383- objIndex = value - 1 ;
384- goto xpgCheckDone ;
385- }
381+ zend_ulong argument_index = width ;
382+ gotXpg = true;
386383
387- notXpg :
388- gotSequential = true;
389- if (gotXpg ) {
390- mixedXPG :
391- zend_value_error ("%s" , "cannot mix \"%\" and \"%n$\" conversion specifiers" );
392- goto error ;
393- }
384+ if (UNEXPECTED (argument_index < 1 || (bindToVariables && argument_index > numVars ))) {
385+ zend_argument_value_error (format_arg_num , "argument index %%%" ZEND_ULONG_FMT_SPEC "$ is out of range" , argument_index );
386+ goto error ;
387+ } else if (!bindToVariables ) {
388+ /*
389+ * In the case where no vars are specified, the user can
390+ * specify %9999$ legally, so we have to consider special
391+ * rules for growing the assign array. 'value' is
392+ * guaranteed to be > 0.
393+ */
394394
395- xpgCheckDone :
396- /*
397- * Parse any width specifier.
398- */
399- if (isdigit (UCHAR (* ch ))) {
400- char * end = NULL ;
401- value = ZEND_STRTOUL (format - 1 , & end , 10 );
402- flags |= SCAN_WIDTH ;
403- format = end ;
404- ch = format ++ ;
395+ /* set a lower artificial limit on this
396+ * in the interest of security and resource friendliness
397+ * 255 arguments should be more than enough. - cc
398+ */
399+ if (argument_index > SCAN_MAX_ARGS ) {
400+ // TODO Specify limit?
401+ zend_argument_value_error (format_arg_num , "argument index %%%" ZEND_ULONG_FMT_SPEC "$ is out of range" , argument_index );
402+ goto error ;
403+ }
404+
405+ xpgSize = MAX (argument_index , xpgSize );
406+ }
407+ objIndex = argument_index - 1 ;
408+
409+ /* Check if we have a width argument with the XPG3 specifier */
410+ if (!isdigit (* ptr )) {
411+ goto length_modifier ;
412+ }
413+ /* Grab the width to be able to continue */
414+ width = ZEND_STRTOUL (ptr , & width_end , 10 );
415+ if (UNEXPECTED (width_end == end_ptr )) {
416+ zend_argument_value_error (format_arg_num , "unterminated format specifier" );
417+ goto error ;
418+ }
419+ ptr = width_end ;
420+ }
421+ } else if (isCurrentSpecifierBound ) {
422+ if (UNEXPECTED (gotXpg )) {
423+ zend_argument_value_error (format_arg_num , "cannot mix \"%%\" and \"%%n$\" conversion specifiers" );
424+ goto error ;
425+ }
426+ gotSequential = true;
405427 }
406428
407- /*
408- * Ignore size specifier.
409- */
410- if ((* ch == 'l' ) || (* ch == 'L' ) || (* ch == 'h' )) {
411- ch = format ++ ;
429+ length_modifier :
430+ /* Ignore length modifier */
431+ if (* ptr == 'l' || * ptr == 'L' || * ptr == 'h' ) {
432+ ptr ++ ;
433+ if (UNEXPECTED (ptr == end_ptr )) {
434+ zend_argument_value_error (format_arg_num , "unterminated format specifier" );
435+ goto error ;
436+ }
412437 }
413438
414- if (!(flags & SCAN_SUPPRESS ) && assignToVariables && (objIndex >= numVars )) {
415- goto badIndex ;
439+ if (isCurrentSpecifierBound && bindToVariables && (objIndex >= numVars )) {
440+ zend_argument_value_error (format_arg_num , "Different numbers of variable names and field specifiers" );
441+ goto error ;
416442 }
417443
418- /*
419- * Handle the various field types.
420- */
421- switch (* ch ) {
444+ /* Handle specifiers */
445+ ZEND_ASSERT (ptr != end_ptr );
446+ switch (* ptr ) {
422447 case 'n' :
423448 case 'd' :
424449 case 'D' :
@@ -440,48 +465,44 @@ static int ValidateFormat(const zend_string *zstr_format, uint32_t format_arg_nu
440465 /* ANSI. since Zend auto allocates space for vars, this is no */
441466 /* problem - cc */
442467 /*
443- if (flags & SCAN_WIDTH ) {
444- php_error_docref(NULL, E_WARNING, "Field width may not be specified in %c conversion");
468+ if (hasFieldWidth ) {
469+ php_error_docref(NULL, E_WARNING, "Field width may not be specified in %% c conversion");
445470 goto error;
446471 }
447472 */
448473 break ;
449474
475+ /* Range specifier */
450476 case '[' :
451- if ( * format == '\0' ) {
452- goto badSet ;
453- }
454- ch = format ++ ;
455- if (* ch == '^' ) {
456- if ( * format == '\0' ) {
457- goto badSet ;
477+ ptr ++ ;
478+ /* Not match flag */
479+ if ( * ptr == '^' ) {
480+ ptr ++ ;
481+ if (UNEXPECTED ( ptr == end_ptr ) ) {
482+ zend_argument_value_error ( format_arg_num , "unterminated [ format specifier" );
483+ goto error ;
458484 }
459- ch = format ++ ;
460485 }
461- if (* ch == ']' ) {
462- if (* format == '\0' ) {
463- goto badSet ;
464- }
465- ch = format ++ ;
486+ /* If ] is the first character of the range it means it should be *included*
487+ * in the range and not mark the end of it (as it would be empty otherwise) */
488+ if (* ptr == ']' ) {
489+ ptr ++ ;
466490 }
467- while (* ch != ']' ) {
468- if (* format == '\0' ) {
469- goto badSet ;
491+ while (* ptr != ']' ) {
492+ if (UNEXPECTED (ptr == end_ptr )) {
493+ zend_argument_value_error (format_arg_num , "unterminated [ format specifier" );
494+ goto error ;
470495 }
471- ch = format ++ ;
496+ ptr ++ ;
472497 }
473498 break ;
474- badSet :
475- zend_value_error ("Unmatched [ in format string" );
476- goto error ;
477499
478- default : {
479- zend_value_error ( "Bad scan conversion character \"%c\"" , * ch );
500+ default :
501+ zend_argument_value_error ( format_arg_num , "unknown format specifier \"%c\"" , * ptr );
480502 goto error ;
481- }
482503 }
483504
484- if (!( flags & SCAN_SUPPRESS ) ) {
505+ if (isCurrentSpecifierBound ) {
485506 if (objIndex >= nspace ) {
486507 /*
487508 * Expand the nassign buffer. If we are using XPG specifiers,
@@ -509,12 +530,12 @@ static int ValidateFormat(const zend_string *zstr_format, uint32_t format_arg_nu
509530 nassign [objIndex ]++ ;
510531 objIndex ++ ;
511532 }
512- } /* while (*format != '\0') */
533+ }
513534
514535 /*
515536 * Verify that all of the variable were assigned exactly once.
516537 */
517- if (!assignToVariables ) {
538+ if (!bindToVariables ) {
518539 if (xpgSize ) {
519540 numVars = xpgSize ;
520541 } else {
@@ -526,35 +547,38 @@ static int ValidateFormat(const zend_string *zstr_format, uint32_t format_arg_nu
526547
527548 for (uint32_t i = 0 ; i < numVars ; i ++ ) {
528549 if (nassign [i ] > 1 ) {
529- zend_value_error ("%s" , "Variable is assigned by multiple \"%n$\" conversion specifiers" );
550+ if (bindToVariables ) {
551+ /* +1 as arguments are 1-indexed not 0-indexed */
552+ zend_argument_value_error (i + 1 + format_arg_num , "is assigned by multiple \"%%n$\" conversion specifiers" );
553+ } else {
554+ zend_argument_value_error (format_arg_num , "argument %" PRIu32 " is assigned by multiple \"%%n$\" conversion specifiers" , i + 1 );
555+ }
530556 goto error ;
531557 } else if (!xpgSize && (nassign [i ] == 0 )) {
532558 /*
533559 * If the space is empty, and xpgSize is 0 (means XPG wasn't
534560 * used, and/or numVars != 0), then too many vars were given
535561 */
536- zend_value_error ("Variable is not assigned by any conversion specifiers" );
562+ if (bindToVariables ) {
563+ /* +1 as arguments are 1-indexed not 0-indexed */
564+ zend_argument_value_error (i + 1 + format_arg_num , "is not assigned by any conversion specifiers" );
565+ } else {
566+ zend_argument_value_error (format_arg_num , "argument %" PRIu32 " is not assigned by any conversion specifiers" , i + 1 );
567+ }
537568 goto error ;
538569 }
539570 }
540571
541572 if (nassign != staticAssign ) {
542- efree ((char * )nassign );
543- }
544- return SCAN_SUCCESS ;
545-
546- badIndex :
547- if (gotXpg ) {
548- zend_value_error ("%s" , "\"%n$\" argument index out of range" );
549- } else {
550- zend_value_error ("Different numbers of variable names and field specifiers" );
573+ efree (nassign );
551574 }
575+ return true;
552576
553577error :
554578 if (nassign != staticAssign ) {
555- efree (( char * ) nassign );
579+ efree (nassign );
556580 }
557- return SCAN_ERROR_INVALID_FORMAT ;
581+ return false ;
558582#undef STATIC_LIST_SIZE
559583}
560584/* }}} */
@@ -602,8 +626,8 @@ PHPAPI int php_sscanf_internal(const char *string, const zend_string *zstr_forma
602626 * Check for errors in the format string.
603627 */
604628 uint32_t totalVars = 0 ;
605- if ( ValidateFormat ( zstr_format , format_arg_num , numVars , & totalVars ) != SCAN_SUCCESS ) {
606- scan_set_error_return ( assignToVariables , return_value );
629+ /* Throws when format is invalid */
630+ if (! isFormatStringValid ( zstr_format , format_arg_num , numVars , & totalVars )) {
607631 return SCAN_ERROR_INVALID_FORMAT ;
608632 }
609633
0 commit comments