@@ -6529,7 +6529,6 @@ PP(pp_unshift)
65296529 return NORMAL ;
65306530}
65316531
6532-
65336532PP_wrapped (pp_reverse , 0 , 1 )
65346533{
65356534 dSP ; dMARK ;
@@ -6555,15 +6554,17 @@ PP_wrapped(pp_reverse, 0, 1)
65556554 SV * begin , * end ;
65566555
65576556 if (can_preserve ) {
6558- if (!av_exists (av , i )) {
6559- if (av_exists (av , j )) {
6557+ bool exists_i = av_exists (av , i );
6558+ bool exists_j = av_exists (av , j );
6559+ if (!exists_i ) {
6560+ if (exists_j ) {
65606561 SV * sv = av_delete (av , j , 0 );
65616562 begin = * av_fetch (av , i , TRUE);
65626563 sv_setsv_mg (begin , sv );
65636564 }
65646565 continue ;
65656566 }
6566- else if (!av_exists ( av , j ) ) {
6567+ else if (!exists_j ) {
65676568 SV * sv = av_delete (av , i , 0 );
65686569 end = * av_fetch (av , j , TRUE);
65696570 sv_setsv_mg (end , sv );
@@ -6644,18 +6645,20 @@ PP_wrapped(pp_reverse, 0, 1)
66446645 * in a single pass, rather than 2-3 passes. */
66456646
66466647 const char * src = SvPV_const (src_sv , len );
6648+ char * tb ;
66476649
66486650 /* Prepare the TARG. */
6651+ SV_CHECK_THINKFIRST_COW_DROP (TARG ); /* Drops any buffer */
66496652 if (SvTYPE (TARG ) < SVt_PV ) {
66506653 SvUPGRADE (TARG , SvTYPE (src_sv )); /* No buffer allocation here */
6651- } else if ( SvTHINKFIRST ( TARG )) {
6652- SV_CHECK_THINKFIRST_COW_DROP (TARG ); /* Drops any buffer */
6654+ } else {
6655+ SvSETMAGIC (TARG );
66536656 }
6654- SvSETMAGIC ( TARG );
6655- SvGROW (TARG , len + 1 );
6657+
6658+ tb = SvGROW (TARG , len + 1 );
66566659 SvCUR_set (TARG , len );
66576660 SvPOK_only (TARG );
6658- * SvEND ( TARG ) = '\0' ;
6661+ tb [ len ] = '\0' ;
66596662 if (SvTAINTED (src_sv ))
66606663 SvTAINT (TARG );
66616664
@@ -6679,10 +6682,69 @@ PP_wrapped(pp_reverse, 0, 1)
66796682 }
66806683 }
66816684 } else {
6685+ STRLEN i = 0 ;
6686+ STRLEN j = len ;
6687+ uint32_t u32_1 , u32_2 ;
6688+ uint16_t u16_1 , u16_2 ;
66826689 char * outp = SvPVX (TARG );
6683- const char * p = src + len ;
6684- while (p != src )
6685- * outp ++ = * -- p ;
6690+ /* Take a chunk of bytes from the front and from the
6691+ * back, reverse the bytes in each and and swap the
6692+ * chunks over. This should have generally good
6693+ * performance but also is likely to be optimised
6694+ * into bswap instructions by the compiler.
6695+ */
6696+ #ifdef HAS_QUAD
6697+ uint64_t u64_1 , u64_2 ;
6698+ while (j - i >= 16 ) {
6699+ memcpy (& u64_1 , src + j - 8 , 8 );
6700+ memcpy (& u64_2 , src + i , 8 );
6701+ u64_1 = _swab_64_ (u64_1 );
6702+ u64_2 = _swab_64_ (u64_2 );
6703+ memcpy (outp + j - 8 , & u64_2 , 8 );
6704+ memcpy (outp + i , & u64_1 , 8 );
6705+ i += 8 ;
6706+ j -= 8 ;
6707+ }
6708+
6709+ if (j - i >= 8 ) {
6710+ memcpy (& u32_1 , src + j - 4 , 4 );
6711+ memcpy (& u32_2 , src + i , 4 );
6712+ u32_1 = _swab_32_ (u32_1 );
6713+ u32_2 = _swab_32_ (u32_2 );
6714+ memcpy (outp + j - 4 , & u32_2 , 4 );
6715+ memcpy (outp + i , & u32_1 , 4 );
6716+ i += 4 ;
6717+ j -= 4 ;
6718+ }
6719+ #else
6720+ while (j - i >= 8 ) {
6721+ memcpy (& u32_1 , src + j - 4 , 4 );
6722+ memcpy (& u32_2 , src + i , 4 );
6723+ u32_1 = _swab_32_ (u32_1 );
6724+ u32_2 = _swab_32_ (u32_2 );
6725+ memcpy (outp + j - 4 , & u32_2 , 4 );
6726+ memcpy (outp + i , & u32_1 , 4 );
6727+ i += 4 ;
6728+ j -= 4 ;
6729+ }
6730+ #endif
6731+ if (j - i >= 4 ) {
6732+ memcpy (& u16_1 , src + j - 2 , 2 );
6733+ memcpy (& u16_2 , src + i , 2 );
6734+ u16_1 = _swab_16_ (u16_1 );
6735+ u16_2 = _swab_16_ (u16_2 );
6736+ memcpy (outp + j - 2 , & u16_2 , 2 );
6737+ memcpy (outp + i , & u16_1 , 2 );
6738+ i += 2 ;
6739+ j -= 2 ;
6740+ }
6741+
6742+ /* Swap any remaining bytes one by one. */
6743+ while (i < j ) {
6744+ outp [i ] = src [j - 1 ];
6745+ outp [j - 1 ] = src [i ];
6746+ i ++ ; j -- ;
6747+ }
66866748 }
66876749 RETURN ;
66886750 }
@@ -6695,8 +6757,8 @@ PP_wrapped(pp_reverse, 0, 1)
66956757
66966758 if (len > 1 ) {
66976759 /* The traditional way, operate on the current byte buffer */
6698- char * down ;
66996760 if (DO_UTF8 (TARG )) { /* first reverse each character */
6761+ char * down ;
67006762 U8 * s = (U8 * )SvPVX (TARG );
67016763 const U8 * send = (U8 * )(s + len );
67026764 while (s < send ) {
@@ -6720,11 +6782,64 @@ PP_wrapped(pp_reverse, 0, 1)
67206782 }
67216783 up = SvPVX (TARG );
67226784 }
6723- down = SvPVX (TARG ) + len - 1 ;
6724- while (down > up ) {
6725- const char tmp = * up ;
6726- * up ++ = * down ;
6727- * down -- = tmp ;
6785+ STRLEN i = 0 ;
6786+ STRLEN j = len ;
6787+ uint32_t u32_1 , u32_2 ;
6788+ uint16_t u16_1 , u16_2 ;
6789+ /* Reverse the buffer in place, in chunks where possible */
6790+ #ifdef HAS_QUAD
6791+ uint64_t u64_1 , u64_2 ;
6792+ while (j - i >= 16 ) {
6793+ memcpy (& u64_1 , up + j - 8 , 8 );
6794+ memcpy (& u64_2 , up + i , 8 );
6795+ u64_1 = _swab_64_ (u64_1 );
6796+ u64_2 = _swab_64_ (u64_2 );
6797+ memcpy (up + j - 8 , & u64_2 , 8 );
6798+ memcpy (up + i , & u64_1 , 8 );
6799+ i += 8 ;
6800+ j -= 8 ;
6801+ }
6802+
6803+ if (j - i >= 8 ) {
6804+ memcpy (& u32_1 , up + j - 4 , 4 );
6805+ memcpy (& u32_2 , up + i , 4 );
6806+ u32_1 = _swab_32_ (u32_1 );
6807+ u32_2 = _swab_32_ (u32_2 );
6808+ memcpy (up + j - 4 , & u32_2 , 4 );
6809+ memcpy (up + i , & u32_1 , 4 );
6810+ i += 4 ;
6811+ j -= 4 ;
6812+ }
6813+ #else
6814+ while (j - i >= 8 ) {
6815+ memcpy (& u32_1 , up + j - 4 , 4 );
6816+ memcpy (& u32_2 , up + i , 4 );
6817+ u32_1 = _swab_32_ (u32_1 );
6818+ u32_2 = _swab_32_ (u32_2 );
6819+ memcpy (up + j - 4 , & u32_2 , 4 );
6820+ memcpy (up + i , & u32_1 , 4 );
6821+ i += 4 ;
6822+ j -= 4 ;
6823+ }
6824+ #endif
6825+ if (j - i >= 4 ) {
6826+ memcpy (& u16_1 , up + j - 2 , 2 );
6827+ memcpy (& u16_2 , up + i , 2 );
6828+ u16_1 = _swab_16_ (u16_1 );
6829+ u16_2 = _swab_16_ (u16_2 );
6830+ memcpy (up + j - 2 , & u16_2 , 2 );
6831+ memcpy (up + i , & u16_1 , 2 );
6832+ i += 2 ;
6833+ j -= 2 ;
6834+ }
6835+
6836+ /* Finally, swap any remaining bytes one-by-one. */
6837+ while (i < j ) {
6838+ unsigned char tmp = up [i ];
6839+ up [i ] = up [j - 1 ];
6840+ up [j - 1 ] = tmp ;
6841+ i ++ ;
6842+ j -- ;
67286843 }
67296844 }
67306845 (void )SvPOK_only_UTF8 (TARG );
0 commit comments