From 03722246d10c08c5ea6c543b75888ca9707c9a25 Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Sun, 12 Oct 2025 21:46:00 +0000 Subject: [PATCH 01/11] Various functions - when a field is already zero, don't set it to zero. SV bodies are Zero()ed when allocated/uprooted from an arena for use. This commit changes instances where a fresh body field is unnecessarily assigned a zero/NULL value into an assertion that the field already contains the desired value. --- gv.c | 2 +- hv.c | 2 +- op.c | 2 +- pp_ctl.c | 2 +- regcomp_invlist.c | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gv.c b/gv.c index 9856e03a2b7a..b640979efed7 100644 --- a/gv.c +++ b/gv.c @@ -669,7 +669,7 @@ S_maybe_add_coresub(pTHX_ HV * const stash, GV *gv, get hairy. */ cv = MUTABLE_CV(newSV_type(SVt_PVCV)); GvCV_set(gv,cv); - GvCVGEN(gv) = 0; + assert(GvCVGEN(gv) == 0); CvISXSUB_on(cv); CvXSUB(cv) = core_xsub; PoisonPADLIST(cv); diff --git a/hv.c b/hv.c index 11e8c3eafd6c..7959ec10acfb 100644 --- a/hv.c +++ b/hv.c @@ -3570,7 +3570,7 @@ S_refcounted_he_value(pTHX_ const struct refcounted_he *he) SvPV_set(value, (char *) he->refcounted_he_data + 1); SvCUR_set(value, he->refcounted_he_val.refcounted_he_u_len); /* This stops anything trying to free it */ - SvLEN_set(value, 0); + assert(SvLEN(value) == 0); SvPOK_on(value); SvREADONLY_on(value); if ((he->refcounted_he_data[0] & HVrhek_typemask) == HVrhek_PV_UTF8) diff --git a/op.c b/op.c index 890acc37285f..1d01d0f203ce 100644 --- a/op.c +++ b/op.c @@ -12320,7 +12320,7 @@ Perl_newSTUB(pTHX_ GV *gv, bool fake) PERL_ARGS_ASSERT_NEWSTUB; assert(!GvCVu(gv)); GvCV_set(gv, cv); - GvCVGEN(gv) = 0; + assert(GvCVGEN(gv) == 0); if (!fake && GvSTASH(gv) && HvENAME_HEK(GvSTASH(gv))) gv_method_changed(gv); if (SvFAKE(gv)) { diff --git a/pp_ctl.c b/pp_ctl.c index 90853e010029..6516005c8348 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -3753,7 +3753,7 @@ S_save_lines(pTHX_ AV *array, SV *sv) sv_setpvn_fresh(tmpstr, s, t - s); /* not breakable until we compile a COP for it */ - SvIV_set(tmpstr, 0); + assert(SvIVX(tmpstr) == 0); SvIOK_on(tmpstr); av_store(array, line++, tmpstr); s = t; diff --git a/regcomp_invlist.c b/regcomp_invlist.c index 658ece56c960..247c0ab56d4e 100644 --- a/regcomp_invlist.c +++ b/regcomp_invlist.c @@ -357,8 +357,8 @@ Perl__new_invlist_C_array(pTHX_ const UV* const list) * of the list proper, so start it just after them */ SvPV_set(invlist, (char *) (list + HEADER_LENGTH)); - SvLEN_set(invlist, 0); /* Means we own the contents, and the system - shouldn't touch it */ + assert(SvLEN(invlist) == 0); /* Means we own the contents, and the system + shouldn't touch it */ *(get_invlist_offset_addr(invlist)) = offset; From 08e879e22d7098f52271e9e4c88661bb3c7db3cc Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Sun, 12 Oct 2025 21:52:36 +0000 Subject: [PATCH 02/11] newSVbool/_true/_false - assign the required values directly. Prior to this commit, `newSVbool`, `newSV_true`, and `newSV_false` used `newSVsv`, but that is less efficient than directly setting up the new SV as it needs to be. --- sv.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/sv.c b/sv.c index af145738def1..61a71d250452 100644 --- a/sv.c +++ b/sv.c @@ -10563,9 +10563,7 @@ SV * Perl_newSVbool(pTHX_ bool bool_val) { PERL_ARGS_ASSERT_NEWSVBOOL; - SV *sv = newSVsv(bool_val ? &PL_sv_yes : &PL_sv_no); - - return sv; + return (bool_val ? newSV_true() : newSV_false()); } /* @@ -10579,7 +10577,18 @@ SV * Perl_newSV_true(pTHX) { PERL_ARGS_ASSERT_NEWSV_TRUE; - SV *sv = newSVsv(&PL_sv_yes); + + /* Equivalent to: SV *sv = newSVsv(&PL_sv_yes); */ + SV *sv; + new_SV(sv); + SvFLAGS(sv) = SVt_PVNV|SVf_IOK|SVp_IOK|SVf_NOK|SVp_NOK + |SVf_POK|SVp_POK|SVf_IsCOW|SVppv_STATIC; + SvPV_set(sv, (char*)PL_Yes); + SvANY(sv) = new_XPVNV(); + SvCUR_set(sv, 1); + SvLEN_set(sv, 0); + SvIV_set(sv, 1); + SvNV_set(sv, 1); return sv; } @@ -10596,7 +10605,18 @@ SV * Perl_newSV_false(pTHX) { PERL_ARGS_ASSERT_NEWSV_FALSE; - SV *sv = newSVsv(&PL_sv_no); + + /* Equivalent to: SV *sv = newSVsv(&PL_sv_no); */ + SV *sv; + new_SV(sv); + SvFLAGS(sv) = SVt_PVNV|SVf_IOK|SVp_IOK|SVf_NOK|SVp_NOK + |SVf_POK|SVp_POK|SVf_IsCOW|SVppv_STATIC; + SvPV_set(sv, (char*)PL_No); + SvANY(sv) = new_XPVNV(); + SvCUR_set(sv, 0); + SvLEN_set(sv, 0); + SvIV_set(sv, 0); + SvNV_set(sv, 0); return sv; } From bf60df58bd814e0cfe6e5463edf065d5517aff73 Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Sun, 12 Oct 2025 22:00:27 +0000 Subject: [PATCH 03/11] Perl_regexec_flags - create and populate a new SV in one call, not two. Perl_regexec_flags had these lines to first create a new SV, then assign to it the value(s) of an existing SV: ``` reginfo->sv = newSV_type(SVt_NULL); SvSetSV_nosteal(reginfo->sv, sv); ``` This is two calls into _sv.c_ and, if the existing SV is `SvOK`, will incur an SV upgrade in the process. Those lines are preceded with the comment: `Not newSVsv, either, as it does not COW.` However, the underpinnings of `newSVsv` and variants do support COW nowadays, so we can now just do: ``` reginfo->sv = newSVsv_flags(sv, SV_GMAGIC|SV_NOSTEAL|SV_DO_COW_SVSETSV); ``` --- regexec.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/regexec.c b/regexec.c index be9ec2cf338f..2016903a81a7 100644 --- a/regexec.c +++ b/regexec.c @@ -3863,10 +3863,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend, S_cleanup_regmatch_info_aux has executed (registered by SAVEDESTRUCTOR_X below). S_cleanup_regmatch_info_aux modifies magic belonging to this SV. - Not newSVsv, either, as it does not COW. */ - reginfo->sv = newSV_type(SVt_NULL); - SvSetSV_nosteal(reginfo->sv, sv); + reginfo->sv = newSVsv_flags(sv, SV_GMAGIC|SV_NOSTEAL|SV_DO_COW_SVSETSV); SAVEFREESV(reginfo->sv); } From 2823f8bf21cd244168875acdd67c72aa7e58ea53 Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Sun, 12 Oct 2025 22:09:35 +0000 Subject: [PATCH 04/11] pp_index - create a SVt_PV directly, not via upgrade Rather than create an `SVt_NULL`, then immediately to `sv_upgrade` it (within the `sv_usepvn` call) to an SVt_PV, just create the SVt_PV in the first place. --- pp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pp.c b/pp.c index ed6155f8810b..a754dd64f754 100644 --- a/pp.c +++ b/pp.c @@ -3855,7 +3855,7 @@ PP(pp_index) * the routine that contains the new byte string, and donate it * to temp to ensure it will get free()d */ if (free_little_p) { - little = temp = newSV_type(SVt_NULL); + little = temp = newSV_type(SVt_PV); sv_usepvn(temp, (char *) little_p, llen); little_p = SvPVX_const(little); } From 37af5559c138b18e52c8c086223ddee839dc2449 Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Sun, 12 Oct 2025 22:17:30 +0000 Subject: [PATCH 05/11] Perl_newSV - entirely create the new SV within this function. Since `new_XPV()` was added, this function can handle both branches of `if (len)` without needing to call any other function. This should be slightly more efficient for both branches. --- sv.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sv.c b/sv.c index 61a71d250452..0993ef13b0ed 100644 --- a/sv.c +++ b/sv.c @@ -6275,10 +6275,14 @@ Perl_newSV(pTHX_ const STRLEN len) { SV *sv; - if (!len) - new_SV(sv); - else { - sv = newSV_type(SVt_PV); + new_SV(sv); + if (len) { + SvFLAGS(sv) = SVt_PV; + SvANY(sv) = new_XPV(); + + SvCUR_set(sv, 0); + SvLEN_set(sv, 0); + sv_grow_fresh(sv, len + 1); } return sv; From a63c25f29fbfc6bf801680ef2e43d5b8130c1649 Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Sun, 12 Oct 2025 22:21:53 +0000 Subject: [PATCH 06/11] Perl_newSVrv - after a sv_clear, do the upgrade to SVt_IV inline. Although this appears in an UNLIKELY branch, this commit shaves off some CPU instructions and may generally help the compiler to optimise the more likely branches. --- sv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sv.c b/sv.c index 0993ef13b0ed..b450c72288a6 100644 --- a/sv.c +++ b/sv.c @@ -11199,10 +11199,11 @@ Perl_newSVrv(pTHX_ SV *const rv, const char *const classname) const U32 refcnt = SvREFCNT(rv); SvREFCNT(rv) = 0; sv_clear(rv); - SvFLAGS(rv) = 0; + SvFLAGS(rv) = SVt_IV; SvREFCNT(rv) = refcnt; - sv_upgrade(rv, SVt_IV); + SET_SVANY_FOR_BODYLESS_IV(rv); + SvIV_set(rv, 0); } else if (SvROK(rv)) { SvREFCNT_dec(SvRV(rv)); } else { From 38bd8e86eee804ed2f0ae5835561e1b35f914a7d Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Sun, 12 Oct 2025 22:28:33 +0000 Subject: [PATCH 07/11] Perl_newSVpvn_share - minor changes to help the compiler. This commit: * swaps `SvLEN_set(sv, 0)` for an assertion of this default value. * Moves `SvCUR_set` and the (now-combined) flag assignment before the call to `sharepvn`, giving the compiler a better chance to combine them with the initialization (likely) inlined from `newSV_type(SVt_PV)`. --- sv.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sv.c b/sv.c index b450c72288a6..d68eb8ccb614 100644 --- a/sv.c +++ b/sv.c @@ -10379,13 +10379,11 @@ Perl_newSVpvn_share(pTHX_ const char *src, I32 len, U32 hash) sv = newSV_type(SVt_PV); /* The logic for this is inlined in S_mro_get_linear_isa_dfs(), so if it changes here, update it there too. */ - SvPV_set(sv, sharepvn(src, is_utf8?-len:len, hash)); + SvFLAGS(sv) |= SVf_POK | SVp_POK | SVf_IsCOW | + (is_utf8 ? SVf_UTF8 : 0); SvCUR_set(sv, len); - SvLEN_set(sv, 0); - SvIsCOW_on(sv); - SvPOK_on(sv); - if (is_utf8) - SvUTF8_on(sv); + assert(SvLEN(sv) ==0); + SvPV_set(sv, sharepvn(src, is_utf8?-len:len, hash)); return sv; } From 131b418c14dd34ae008156ae25f52b98d38b5fa6 Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Sun, 12 Oct 2025 23:31:06 +0000 Subject: [PATCH 08/11] Perl_newSVhek - refactor to prioritize the overwhelming common case. Existing comments highlighted the common case, confirmed by a _gcov_ build and run of the test harness. For that workload: * `(!hek)` is vanishingly rare * `(flags & HVhek_NOTSHARED)` isn't hit by core at all. * `(HEK_LEN(hek) == HEf_SVKEY)` was about 1% of calls. The function was refactored in light of the existing comments and that _gcov_ data. This also cut the number of resulting CPU instructions by about half on a normal gcc build. --- sv.c | 77 +++++++++++++++++++++++++++--------------------------------- 1 file changed, 34 insertions(+), 43 deletions(-) diff --git a/sv.c b/sv.c index d68eb8ccb614..dfb62f906900 100644 --- a/sv.c +++ b/sv.c @@ -10290,54 +10290,45 @@ SV if C is NULL. SV * Perl_newSVhek(pTHX_ const HEK *const hek) { - if (!hek) { - SV *sv; - - new_SV(sv); - return sv; - } - - if (HEK_LEN(hek) == HEf_SVKEY) { - return newSVsv(*(SV**)HEK_KEY(hek)); - } else { - const int flags = HEK_FLAGS(hek); - if (flags & HVhek_WASUTF8) { - /* Trouble :-) - Andreas would like keys he put in as utf8 to come back as utf8 - */ - STRLEN utf8_len = HEK_LEN(hek); - SV * const sv = newSV_type(SVt_PV); - char *as_utf8 = (char *)bytes_to_utf8 ((U8*)HEK_KEY(hek), &utf8_len); - /* bytes_to_utf8() allocates a new string, which we can repurpose: */ - sv_usepvn_flags(sv, as_utf8, utf8_len, SV_HAS_TRAILING_NUL); - SvUTF8_on (sv); - return sv; - } else if (flags & HVhek_NOTSHARED) { - /* A hash that isn't using shared hash keys has to have - the flag in every key so that we know not to try to call - share_hek_hek on it. */ + SV *sv = newSV_type(SVt_PV); - SV * const sv = newSVpvn (HEK_KEY(hek), HEK_LEN(hek)); - if (HEK_UTF8(hek)) + if (LIKELY(hek)) { + if (HEK_LEN(hek) != HEf_SVKEY) { + const int flags = HEK_FLAGS(hek); + if (LIKELY(!(flags & (HVhek_WASUTF8|HVhek_NOTSHARED)))) { + /* This will be overwhelmingly the most common case. */ + /* Inline most of newSVpvn_share(), because share_hek_hek() is far + more efficient than sharepvn(). */ + SvFLAGS(sv) = SVt_PV | SVf_POK | SVp_POK | SVf_IsCOW | + (HEK_UTF8(hek) ? SVf_UTF8 : 0); + SvCUR_set(sv, HEK_LEN(hek)); + SvPV_set(sv, (char *)HEK_KEY(share_hek_hek(hek))); + assert(SvLEN(sv) == 0); /* SVt_PV should be initialized with this value */ + return sv; + } else if (flags & HVhek_WASUTF8) { + /* Trouble :-) + Andreas would like keys he put in as utf8 to come back as utf8 + */ + STRLEN utf8_len = HEK_LEN(hek); + char *as_utf8 = (char *)bytes_to_utf8 ((U8*)HEK_KEY(hek), &utf8_len); + /* bytes_to_utf8() allocates a new string, which we can repurpose: */ + sv_usepvn_flags(sv, as_utf8, utf8_len, SV_HAS_TRAILING_NUL); SvUTF8_on (sv); - return sv; - } - /* This will be overwhelmingly the most common case. */ - { - /* Inline most of newSVpvn_share(), because share_hek_hek() is far - more efficient than sharepvn(). */ - SV *sv = newSV_type(SVt_PV); - - SvPV_set(sv, (char *)HEK_KEY(share_hek_hek(hek))); - SvCUR_set(sv, HEK_LEN(hek)); - SvLEN_set(sv, 0); - SvIsCOW_on(sv); - SvPOK_on(sv); - if (HEK_UTF8(hek)) - SvUTF8_on(sv); + return sv; + } else { + assert(flags & HVhek_NOTSHARED); + sv_setpvn_fresh(sv,HEK_KEY(hek),HEK_LEN(hek)); + if (HEK_UTF8(hek)) + SvUTF8_on (sv); + return sv; + } + } else { + /* (HEK_LEN(hek) == HEf_SVKEY) is comparatively more rare nowadays */ + sv_setsv_flags(sv, *(SV**)HEK_KEY(hek), SV_GMAGIC|SV_NOSTEAL); return sv; } } + return sv; } /* From 04630198724abb84c4f9aaaf60f2894ab9390245 Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Sun, 12 Oct 2025 23:34:40 +0000 Subject: [PATCH 09/11] S_sv_gets_append_to_utf8 - sv_gets upgrades tsv, so create SVt_PV directly. --- sv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sv.c b/sv.c index dfb62f906900..598229c96dca 100644 --- a/sv.c +++ b/sv.c @@ -9147,7 +9147,7 @@ Perl_sv_collxfrm_flags(pTHX_ SV *const sv, STRLEN *const nxp, const I32 flags) static char * S_sv_gets_append_to_utf8(pTHX_ SV *const sv, PerlIO *const fp, SSize_t append) { - SV * const tsv = newSV_type(SVt_NULL); + SV * const tsv = newSV_type(SVt_PV); ENTER; SAVEFREESV(tsv); sv_gets(tsv, fp, 0); From 28cb41755fd1afb49531fd8ff36c6ca56aa3790b Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Sun, 12 Oct 2025 23:36:29 +0000 Subject: [PATCH 10/11] Perl_newSVpvz - add some ASSUMEs to help the compiler --- sv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sv.c b/sv.c index 598229c96dca..774e864af428 100644 --- a/sv.c +++ b/sv.c @@ -6305,6 +6305,11 @@ Perl_newSVpvz(pTHX_ const STRLEN len) { SV *sv = newSV_type(SVt_PV); sv_grow_fresh(sv, len + 1); + + /* Some ASSUMEs which may help the compiler avoid unnecessary work */ + ASSUME(SvCUR(sv) == 0); + ASSUME(SvFLAGS(sv) == SVt_PV); + ASSUME(!TAINT_get); (void) sv_setpv_freshbuf(sv); return sv; From 8521334d5a302cb1b8e98b8afec8b6934813aca8 Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Sun, 12 Oct 2025 23:39:19 +0000 Subject: [PATCH 11/11] Perl_sv_grow_fresh - set SvLEN before calling malloc The compiler stands a better chance of optimising the function this way around. (e.g. A gcc build reduces from 20 instructions down to 15.) --- sv.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sv.c b/sv.c index 774e864af428..a60f056665d9 100644 --- a/sv.c +++ b/sv.c @@ -1500,12 +1500,13 @@ Perl_sv_grow_fresh(pTHX_ SV *const sv, STRLEN newlen) if (newlen < PERL_STRLEN_NEW_MIN) newlen = PERL_STRLEN_NEW_MIN; - s = (char*)safemalloc(newlen); - SvPV_set(sv, s); - /* No PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC here, since many strings */ /* will never be grown once set. Let the real sv_grow worry about that. */ SvLEN_set(sv, newlen); + + s = (char*)safemalloc(newlen); + SvPV_set(sv, s); + return s; }