From be37467155a3b2617a5f8e69e0b51a60ccff7e4d Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Wed, 11 Jun 2025 22:11:18 +0000 Subject: [PATCH 1/5] [msan] Automatically print shadow for failing outlined checks A commonly used aid for debugging MSan reports is __msan_print_shadow(), which must be manually applied (typically to the variable in the UUM report or nearby). This is in contrast to ASan, which automatically prints out the shadow map when a check fails. This patch changes MSan to print the shadow that failed an outlined check (checks are outlined per function after -msan-instrumentation-with-call-threshold is exceeded). As a side effect, this also enables outlined checks for arbitrary-sized shadows (vs. the current hardcoded handlers for {1,2,4,8}-byte shadows). Note that we do not print out the shadow map of "neighboring" variables because this is technically infeasible; see "Caveat" below. This patch can be easier to use than __msan_print_shadow() because this does not require manual app code annotations. Additionally, due to optimizations, __msan_print_shadow() calls can sometimes spuriously affect whether a variable is initialized. Caveat: the shadow does not necessarily correspond to an individual user variable (MSan instrumentation may combine and/or truncate multiple shadows prior to emitting a check that the mangled shadow is zero; e.g., convertShadowToScalar, handleSSEVectorConvertIntrinsic, materializeInstructionChecks). OTOH it is arguably a strength that this feature emit the shadow that directly matters for the MSan check, but which cannot be obtained using the MSan API. --- compiler-rt/lib/msan/msan.cpp | 45 +++++++++++++++++++ .../lib/msan/msan_interface_internal.h | 2 + .../msan_print_shadow_on_outlined_check.cpp | 39 ++++++++++++++++ .../Instrumentation/MemorySanitizer.cpp | 42 ++++++++++++----- 4 files changed, 118 insertions(+), 10 deletions(-) create mode 100644 compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp index a3c0c2e485af3..d82881235340b 100644 --- a/compiler-rt/lib/msan/msan.cpp +++ b/compiler-rt/lib/msan/msan.cpp @@ -352,11 +352,33 @@ void __sanitizer::BufferedStackTrace::UnwindImpl( using namespace __msan; +// N.B. Only [shadow, shadow+size) is defined. shadow is *not* a pointer into +// an MSan shadow region. +static void print_shadow_value(void *shadow, u64 size) { + Printf("\n"); + Printf("Shadow value (%llu byte%s):", size, size == 1 ? "" : "s"); + for (unsigned int i = 0; i < size; i++) { + if (i % 4 == 0) + Printf(" "); + + unsigned char x = ((unsigned char *)shadow)[i]; + Printf("%x%x", x >> 4, x & 0xf); + } + Printf("\n"); + Printf( + "Caveat: the shadow value does not necessarily directly correspond to a " + "single user variable. The correspondence is stronger, but not always " + "perfect, when origin tracking is enabled.\n"); + Printf("\n"); +} + #define MSAN_MAYBE_WARNING(type, size) \ void __msan_maybe_warning_##size(type s, u32 o) { \ GET_CALLER_PC_BP; \ + \ if (UNLIKELY(s)) { \ PrintWarningWithOrigin(pc, bp, o); \ + print_shadow_value((void *)(&s), sizeof(s)); \ if (__msan::flags()->halt_on_error) { \ Printf("Exiting\n"); \ Die(); \ @@ -369,6 +391,29 @@ MSAN_MAYBE_WARNING(u16, 2) MSAN_MAYBE_WARNING(u32, 4) MSAN_MAYBE_WARNING(u64, 8) +// N.B. Only [shadow, shadow+size) is defined. shadow is *not* a pointer into +// an MSan shadow region. +void __msan_maybe_warning_N(void *shadow, u64 size, u32 o) { + GET_CALLER_PC_BP; + + bool allZero = true; + for (unsigned int i = 0; i < size; i++) { + if (((char *)shadow)[i]) { + allZero = false; + break; + } + } + + if (UNLIKELY(!allZero)) { + PrintWarningWithOrigin(pc, bp, o); + print_shadow_value(shadow, size); + if (__msan::flags()->halt_on_error) { + Printf("Exiting\n"); + Die(); + } + } +} + #define MSAN_MAYBE_STORE_ORIGIN(type, size) \ void __msan_maybe_store_origin_##size(type s, void *p, u32 o) { \ if (UNLIKELY(s)) { \ diff --git a/compiler-rt/lib/msan/msan_interface_internal.h b/compiler-rt/lib/msan/msan_interface_internal.h index c2eead13c20cf..75425b98166a9 100644 --- a/compiler-rt/lib/msan/msan_interface_internal.h +++ b/compiler-rt/lib/msan/msan_interface_internal.h @@ -60,6 +60,8 @@ SANITIZER_INTERFACE_ATTRIBUTE void __msan_maybe_warning_4(u32 s, u32 o); SANITIZER_INTERFACE_ATTRIBUTE void __msan_maybe_warning_8(u64 s, u32 o); +SANITIZER_INTERFACE_ATTRIBUTE +void __msan_maybe_warning_N(void *shadow, u64 size, u32 o); SANITIZER_INTERFACE_ATTRIBUTE void __msan_maybe_store_origin_1(u8 s, void *p, u32 o); diff --git a/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp b/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp new file mode 100644 index 0000000000000..a087c1d8a9053 --- /dev/null +++ b/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp @@ -0,0 +1,39 @@ +// RUN: %clangxx_msan -fsanitize-recover=memory -mllvm -msan-instrumentation-with-call-threshold=0 -g %s -o %t \ +// RUN: && not %run %t 2>&1 | FileCheck %s + +#include +#include + +#include + +int main(int argc, char *argv[]) { + long double a; + printf("a: %Lf\n", a); + // CHECK: Shadow value (16 bytes): ffffffff ffffffff ffff0000 00000000 + + unsigned long long b; + printf("b: %llu\n", b); + // CHECK: Shadow value (8 bytes): ffffffff ffffffff + + char *p = (char *)(&b); + p[2] = 36; + printf("b: %lld\n", b); + // CHECK: Shadow value (8 bytes): ffff00ff ffffffff + + b = b << 8; + printf("b: %lld\n", b); + __msan_print_shadow(&b, sizeof(b)); + // CHECK: Shadow value (8 bytes): 00ffff00 ffffffff + + unsigned int c; + printf("c: %u\n", c); + // CHECK: Shadow value (4 bytes): ffffffff + + // Converted to boolean + if (c) { + // CHECK: Shadow value (1 byte): 01 + printf("Hello\n"); + } + + return 0; +} diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index c2315d5de7041..1fbeebc49e149 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -652,6 +652,7 @@ class MemorySanitizer { // These arrays are indexed by log2(AccessSize). FunctionCallee MaybeWarningFn[kNumberOfAccessSizes]; + FunctionCallee MaybeWarningVarSizeFn; FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes]; /// Run-time helper that generates a new origin value for a stack @@ -926,7 +927,9 @@ void MemorySanitizer::createUserspaceApi(Module &M, MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction( FunctionName, TLI.getAttrList(C, {0, 1}, /*Signed=*/false), IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty()); - + MaybeWarningVarSizeFn = M.getOrInsertFunction( + "__msan_maybe_warning_N", TLI.getAttrList(C, {}, /*Signed=*/false), + IRB.getVoidTy(), PtrTy, IRB.getInt64Ty(), IRB.getInt32Ty()); FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize); MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction( FunctionName, TLI.getAttrList(C, {0, 2}, /*Signed=*/false), @@ -1233,7 +1236,6 @@ struct MemorySanitizerVisitor : public InstVisitor { // Constants likely will be eliminated by follow-up passes. if (isa(V)) return false; - ++SplittableBlocksCount; return ClInstrumentationWithCallThreshold >= 0 && SplittableBlocksCount > ClInstrumentationWithCallThreshold; @@ -1432,18 +1434,38 @@ struct MemorySanitizerVisitor : public InstVisitor { const DataLayout &DL = F.getDataLayout(); TypeSize TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType()); unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits); - if (instrumentWithCalls(ConvertedShadow) && - SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) { - FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex]; + if (instrumentWithCalls(ConvertedShadow) && !MS.CompileKernel) { // ZExt cannot convert between vector and scalar ConvertedShadow = convertShadowToScalar(ConvertedShadow, IRB); Value *ConvertedShadow2 = IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex))); - CallBase *CB = IRB.CreateCall( - Fn, {ConvertedShadow2, - MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)}); - CB->addParamAttr(0, Attribute::ZExt); - CB->addParamAttr(1, Attribute::ZExt); + + if (SizeIndex < kNumberOfAccessSizes) { + FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex]; + CallBase *CB = IRB.CreateCall( + Fn, + {ConvertedShadow2, + MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)}); + CB->addParamAttr(0, Attribute::ZExt); + CB->addParamAttr(1, Attribute::ZExt); + } else { + FunctionCallee Fn = MS.MaybeWarningVarSizeFn; + + // Note: we can only dump the current shadow value, not an entire + // neighborhood shadow map (as ASan does). This is because the shadow + // value does not necessarily correspond to a user variable: MSan code + // often combines shadows (e.g., convertShadowToScalar, + // handleSSEVectorConvertIntrinsic, materializeInstructionChecks). + Value *ShadowAlloca = IRB.CreateAlloca(ConvertedShadow2->getType(), 0u); + IRB.CreateStore(ConvertedShadow2, ShadowAlloca); + unsigned ShadowSize = DL.getTypeAllocSize(ConvertedShadow2->getType()); + CallBase *CB = IRB.CreateCall( + Fn, + {ShadowAlloca, ConstantInt::get(IRB.getInt64Ty(), ShadowSize), + MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)}); + CB->addParamAttr(1, Attribute::ZExt); + CB->addParamAttr(2, Attribute::ZExt); + } } else { Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp"); Instruction *CheckTerm = SplitBlockAndInsertIfThen( From a856b74110679d50c3d0d91f2a1278b78b7146cf Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Fri, 20 Jun 2025 23:01:41 +0000 Subject: [PATCH 2/5] Require verbosity >= 1 --- compiler-rt/lib/msan/msan.cpp | 28 ++++++++++--------- .../msan_print_shadow_on_outlined_check.cpp | 2 +- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp index d82881235340b..6f58dfad32bbb 100644 --- a/compiler-rt/lib/msan/msan.cpp +++ b/compiler-rt/lib/msan/msan.cpp @@ -372,18 +372,19 @@ static void print_shadow_value(void *shadow, u64 size) { Printf("\n"); } -#define MSAN_MAYBE_WARNING(type, size) \ - void __msan_maybe_warning_##size(type s, u32 o) { \ - GET_CALLER_PC_BP; \ - \ - if (UNLIKELY(s)) { \ - PrintWarningWithOrigin(pc, bp, o); \ - print_shadow_value((void *)(&s), sizeof(s)); \ - if (__msan::flags()->halt_on_error) { \ - Printf("Exiting\n"); \ - Die(); \ - } \ - } \ +#define MSAN_MAYBE_WARNING(type, size) \ + void __msan_maybe_warning_##size(type s, u32 o) { \ + GET_CALLER_PC_BP; \ + \ + if (UNLIKELY(s)) { \ + PrintWarningWithOrigin(pc, bp, o); \ + if (Verbosity() >= 1) \ + print_shadow_value((void *)(&s), sizeof(s)); \ + if (__msan::flags()->halt_on_error) { \ + Printf("Exiting\n"); \ + Die(); \ + } \ + } \ } MSAN_MAYBE_WARNING(u8, 1) @@ -406,7 +407,8 @@ void __msan_maybe_warning_N(void *shadow, u64 size, u32 o) { if (UNLIKELY(!allZero)) { PrintWarningWithOrigin(pc, bp, o); - print_shadow_value(shadow, size); + if (Verbosity() >= 1) + print_shadow_value(shadow, size); if (__msan::flags()->halt_on_error) { Printf("Exiting\n"); Die(); diff --git a/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp b/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp index a087c1d8a9053..3ecb1277f23cc 100644 --- a/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp +++ b/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx_msan -fsanitize-recover=memory -mllvm -msan-instrumentation-with-call-threshold=0 -g %s -o %t \ -// RUN: && not %run %t 2>&1 | FileCheck %s +// RUN: && not env MSAN_OPTIONS=verbosity=1 %run %t 2>&1 | FileCheck %s #include #include From 338bccd3d82a344f00d8e05ac79baf66b99c41af Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Fri, 20 Jun 2025 23:11:21 +0000 Subject: [PATCH 3/5] Move before PrintWarningWithOrigin --- compiler-rt/lib/msan/msan.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp index 6f58dfad32bbb..67879e37fad5e 100644 --- a/compiler-rt/lib/msan/msan.cpp +++ b/compiler-rt/lib/msan/msan.cpp @@ -355,7 +355,6 @@ using namespace __msan; // N.B. Only [shadow, shadow+size) is defined. shadow is *not* a pointer into // an MSan shadow region. static void print_shadow_value(void *shadow, u64 size) { - Printf("\n"); Printf("Shadow value (%llu byte%s):", size, size == 1 ? "" : "s"); for (unsigned int i = 0; i < size; i++) { if (i % 4 == 0) @@ -377,9 +376,9 @@ static void print_shadow_value(void *shadow, u64 size) { GET_CALLER_PC_BP; \ \ if (UNLIKELY(s)) { \ - PrintWarningWithOrigin(pc, bp, o); \ if (Verbosity() >= 1) \ print_shadow_value((void *)(&s), sizeof(s)); \ + PrintWarningWithOrigin(pc, bp, o); \ if (__msan::flags()->halt_on_error) { \ Printf("Exiting\n"); \ Die(); \ @@ -406,9 +405,9 @@ void __msan_maybe_warning_N(void *shadow, u64 size, u32 o) { } if (UNLIKELY(!allZero)) { - PrintWarningWithOrigin(pc, bp, o); if (Verbosity() >= 1) print_shadow_value(shadow, size); + PrintWarningWithOrigin(pc, bp, o); if (__msan::flags()->halt_on_error) { Printf("Exiting\n"); Die(); From 789ec92b2168ecf9e5514f2c49a069f0126eeee5 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Fri, 20 Jun 2025 23:38:24 +0000 Subject: [PATCH 4/5] Remove note --- llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 1fbeebc49e149..4e4be4c6dd47e 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1450,12 +1450,6 @@ struct MemorySanitizerVisitor : public InstVisitor { CB->addParamAttr(1, Attribute::ZExt); } else { FunctionCallee Fn = MS.MaybeWarningVarSizeFn; - - // Note: we can only dump the current shadow value, not an entire - // neighborhood shadow map (as ASan does). This is because the shadow - // value does not necessarily correspond to a user variable: MSan code - // often combines shadows (e.g., convertShadowToScalar, - // handleSSEVectorConvertIntrinsic, materializeInstructionChecks). Value *ShadowAlloca = IRB.CreateAlloca(ConvertedShadow2->getType(), 0u); IRB.CreateStore(ConvertedShadow2, ShadowAlloca); unsigned ShadowSize = DL.getTypeAllocSize(ConvertedShadow2->getType()); From 345ae9f4182d59977249d0b95c0d67960359eb84 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Fri, 20 Jun 2025 23:38:42 +0000 Subject: [PATCH 5/5] Update llvm/test/Instrumentation/MemorySanitizer/with-call-type-size.ll --- .../MemorySanitizer/with-call-type-size.ll | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/llvm/test/Instrumentation/MemorySanitizer/with-call-type-size.ll b/llvm/test/Instrumentation/MemorySanitizer/with-call-type-size.ll index 0b81e5682062a..3b1ab3364b310 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/with-call-type-size.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/with-call-type-size.ll @@ -73,13 +73,30 @@ define <4 x i32> @test64(<4 x i32> %vec, i64 %idx, i32 %x) sanitize_memory { ; CHECK: call void @__msan_maybe_warning_8(i64 zeroext %{{.*}}, i32 zeroext 0) ; CHECK: ret <4 x i32> -; Type size too large => inline check. +; Type size too large => use variable-size handler. define <4 x i32> @test65(<4 x i32> %vec, i65 %idx, i32 %x) sanitize_memory { %vec1 = insertelement <4 x i32> %vec, i32 %x, i65 %idx ret <4 x i32> %vec1 } ; CHECK-LABEL: @test65( -; CHECK: call void @__msan_warning_noreturn +; CHECK: %[[A:.*]] = zext i65 %1 to i128 +; CHECK: call void @__msan_maybe_warning_N(ptr %{{.*}}, i64 zeroext 16, i32 zeroext 0) +; CHECK: ret <4 x i32> + +define <4 x i32> @test128(<4 x i32> %vec, i128 %idx, i32 %x) sanitize_memory { + %vec1 = insertelement <4 x i32> %vec, i32 %x, i128 %idx + ret <4 x i32> %vec1 +} +; CHECK-LABEL: @test128( +; CHECK: call void @__msan_maybe_warning_N(ptr %{{.*}}, i64 zeroext 16, i32 zeroext 0) +; CHECK: ret <4 x i32> + +define <4 x i32> @test256(<4 x i32> %vec, i256 %idx, i32 %x) sanitize_memory { + %vec1 = insertelement <4 x i32> %vec, i32 %x, i256 %idx + ret <4 x i32> %vec1 +} +; CHECK-LABEL: @test256( +; CHECK: call void @__msan_maybe_warning_N(ptr %{{.*}}, i64 zeroext 32, i32 zeroext 0) ; CHECK: ret <4 x i32> define <4 x i32> @testUndef(<4 x i32> %vec, i32 %x) sanitize_memory {