Skip to content

Reapply [Analyzer][CFG] Correctly handle rebuilt default arg and default init expression #146281

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1064,6 +1064,9 @@ Static Analyzer
---------------
- Fixed a crash when C++20 parenthesized initializer lists are used. This issue
was causing a crash in clang-tidy. (#GH136041)
- Clang currently support extending lifetime of object bound to
reference members of aggregates in CFG and ExprEngine, that are
created from default member initializer.

New features
^^^^^^^^^^^^
Expand Down
31 changes: 31 additions & 0 deletions clang/include/clang/AST/ParentMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#ifndef LLVM_CLANG_AST_PARENTMAP_H
#define LLVM_CLANG_AST_PARENTMAP_H

#include "llvm/Support/Casting.h"

namespace clang {
class Stmt;
class Expr;
Expand All @@ -39,6 +41,25 @@ class ParentMap {
Stmt *getParentIgnoreParenImpCasts(Stmt *) const;
Stmt *getOuterParenParent(Stmt *) const;

template <typename... Ts> Stmt *getOuterMostAncestor(Stmt *S) const {
Stmt *Res = nullptr;
while (S) {
if (llvm::isa<Ts...>(S))
Res = S;
S = getParent(S);
}
return Res;
}

template <typename... Ts> Stmt *getInnerMostAncestor(Stmt *S) const {
while (S) {
if (llvm::isa<Ts...>(S))
return S;
S = getParent(S);
}
return nullptr;
}

const Stmt *getParent(const Stmt* S) const {
return getParent(const_cast<Stmt*>(S));
}
Expand All @@ -51,6 +72,16 @@ class ParentMap {
return getParentIgnoreParenCasts(const_cast<Stmt*>(S));
}

template <typename... Ts>
const Stmt *getOuterMostAncestor(const Stmt *S) const {
return getOuterMostAncestor<Ts...>(const_cast<Stmt *>(S));
}

template <typename... Ts>
const Stmt *getInnerMostAncestor(const Stmt *S) const {
return getInnerMostAncestor<Ts...>(const_cast<Stmt *>(S));
}

bool hasParent(const Stmt *S) const { return getParent(S) != nullptr; }

bool isConsumedExpr(Expr *E) const;
Expand Down
17 changes: 17 additions & 0 deletions clang/lib/AST/ParentMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "clang/AST/ParentMap.h"
#include "clang/AST/Decl.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/StmtObjC.h"
#include "llvm/ADT/DenseMap.h"

Expand Down Expand Up @@ -103,6 +104,22 @@ static void BuildParentMap(MapTy& M, Stmt* S,
BuildParentMap(M, SubStmt, OVMode);
}
break;
case Stmt::CXXDefaultArgExprClass:
if (auto *Arg = dyn_cast<CXXDefaultArgExpr>(S)) {
if (Arg->hasRewrittenInit()) {
M[Arg->getExpr()] = S;
BuildParentMap(M, Arg->getExpr(), OVMode);
}
}
break;
case Stmt::CXXDefaultInitExprClass:
if (auto *Init = dyn_cast<CXXDefaultInitExpr>(S)) {
if (Init->hasRewrittenInit()) {
M[Init->getExpr()] = S;
BuildParentMap(M, Init->getExpr(), OVMode);
}
}
break;
default:
for (Stmt *SubStmt : S->children()) {
if (SubStmt) {
Expand Down
54 changes: 45 additions & 9 deletions clang/lib/Analysis/CFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,10 @@ class CFGBuilder {

private:
// Visitors to walk an AST and construct the CFG.
CFGBlock *VisitCXXDefaultArgExpr(CXXDefaultArgExpr *Default,
AddStmtChoice asc);
CFGBlock *VisitCXXDefaultInitExpr(CXXDefaultInitExpr *Default,
AddStmtChoice asc);
CFGBlock *VisitInitListExpr(InitListExpr *ILE, AddStmtChoice asc);
CFGBlock *VisitAddrLabelExpr(AddrLabelExpr *A, AddStmtChoice asc);
CFGBlock *VisitAttributedStmt(AttributedStmt *A, AddStmtChoice asc);
Expand Down Expand Up @@ -2345,16 +2349,10 @@ CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc,
asc, ExternallyDestructed);

case Stmt::CXXDefaultArgExprClass:
return VisitCXXDefaultArgExpr(cast<CXXDefaultArgExpr>(S), asc);

case Stmt::CXXDefaultInitExprClass:
// FIXME: The expression inside a CXXDefaultArgExpr is owned by the
// called function's declaration, not by the caller. If we simply add
// this expression to the CFG, we could end up with the same Expr
// appearing multiple times (PR13385).
//
// It's likewise possible for multiple CXXDefaultInitExprs for the same
// expression to be used in the same function (through aggregate
// initialization).
return VisitStmt(S, asc);
return VisitCXXDefaultInitExpr(cast<CXXDefaultInitExpr>(S), asc);

case Stmt::CXXBindTemporaryExprClass:
return VisitCXXBindTemporaryExpr(cast<CXXBindTemporaryExpr>(S), asc);
Expand Down Expand Up @@ -2524,6 +2522,44 @@ CFGBlock *CFGBuilder::VisitChildren(Stmt *S) {
return B;
}

CFGBlock *CFGBuilder::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *Arg,
AddStmtChoice asc) {
if (Arg->hasRewrittenInit()) {
if (asc.alwaysAdd(*this, Arg)) {
autoCreateBlock();
appendStmt(Block, Arg);
}
return VisitStmt(Arg->getExpr()->IgnoreParens(), asc);
}

// We can't add the default argument if it's not rewritten because the
// expression inside a CXXDefaultArgExpr is owned by the called function's
// declaration, not by the caller, we could end up with the same expression
// appearing multiple times.
return VisitStmt(Arg, asc);
}

CFGBlock *CFGBuilder::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *Init,
AddStmtChoice asc) {
if (Init->hasRewrittenInit()) {
if (asc.alwaysAdd(*this, Init)) {
autoCreateBlock();
appendStmt(Block, Init);
}

// Unlike CXXDefaultArgExpr::getExpr stripped off the top level FullExpr and
// ConstantExpr, CXXDefaultInitExpr::getExpr does not do this, so we don't
// need to ignore ParenExprs, because the top level will not be a ParenExpr.
return VisitStmt(Init->getExpr(), asc);
}

// We can't add the default initializer if it's not rewritten because multiple
// CXXDefaultInitExprs for the same sub-expression to be used in the same
// function (through aggregate initialization). we could end up with the same
// expression appearing multiple times.
return VisitStmt(Init, asc);
}

CFGBlock *CFGBuilder::VisitInitListExpr(InitListExpr *ILE, AddStmtChoice asc) {
if (asc.alwaysAdd(*this, ILE)) {
autoCreateBlock();
Expand Down
68 changes: 18 additions & 50 deletions clang/lib/Analysis/ReachableCode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/BitVector.h"
#include <cstddef>
#include <optional>

using namespace clang;
Expand Down Expand Up @@ -396,17 +397,18 @@ namespace {
SmallVector<const CFGBlock *, 10> WorkList;
Preprocessor &PP;
ASTContext &C;
AnalysisDeclContext &AC;

typedef SmallVector<std::pair<const CFGBlock *, const Stmt *>, 12>
DeferredLocsTy;

DeferredLocsTy DeferredLocs;

public:
DeadCodeScan(llvm::BitVector &reachable, Preprocessor &PP, ASTContext &C)
: Visited(reachable.size()),
Reachable(reachable),
PP(PP), C(C) {}
DeadCodeScan(llvm::BitVector &reachable, Preprocessor &PP,
AnalysisDeclContext &AC)
: Visited(reachable.size()), Reachable(reachable), PP(PP),
C(AC.getASTContext()), AC(AC) {}

void enqueue(const CFGBlock *block);
unsigned scanBackwards(const CFGBlock *Start,
Expand Down Expand Up @@ -453,70 +455,36 @@ bool DeadCodeScan::isDeadCodeRoot(const clang::CFGBlock *Block) {
return isDeadRoot;
}

// Check if the given `DeadStmt` is a coroutine statement and is a substmt of
// the coroutine statement. `Block` is the CFGBlock containing the `DeadStmt`.
static bool isInCoroutineStmt(const Stmt *DeadStmt, const CFGBlock *Block) {
// The coroutine statement, co_return, co_await, or co_yield.
const Stmt *CoroStmt = nullptr;
// Find the first coroutine statement after the DeadStmt in the block.
bool AfterDeadStmt = false;
for (CFGBlock::const_iterator I = Block->begin(), E = Block->end(); I != E;
++I)
if (std::optional<CFGStmt> CS = I->getAs<CFGStmt>()) {
const Stmt *S = CS->getStmt();
if (S == DeadStmt)
AfterDeadStmt = true;
if (AfterDeadStmt &&
// For simplicity, we only check simple coroutine statements.
(llvm::isa<CoreturnStmt>(S) || llvm::isa<CoroutineSuspendExpr>(S))) {
CoroStmt = S;
break;
}
}
if (!CoroStmt)
return false;
struct Checker : DynamicRecursiveASTVisitor {
const Stmt *DeadStmt;
bool CoroutineSubStmt = false;
Checker(const Stmt *S) : DeadStmt(S) {
// Statements captured in the CFG can be implicit.
ShouldVisitImplicitCode = true;
}

bool VisitStmt(Stmt *S) override {
if (S == DeadStmt)
CoroutineSubStmt = true;
return true;
}
};
Checker checker(DeadStmt);
checker.TraverseStmt(const_cast<Stmt *>(CoroStmt));
return checker.CoroutineSubStmt;
}

static bool isValidDeadStmt(const Stmt *S, const clang::CFGBlock *Block) {
static bool isValidDeadStmt(ParentMap &PM, const Stmt *S,
const clang::CFGBlock *Block) {
if (S->getBeginLoc().isInvalid())
return false;
if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(S))
return BO->getOpcode() != BO_Comma;
// Coroutine statements are never considered dead statements, because removing
// them may change the function semantic if it is the only coroutine statement
// of the coroutine.
return !isInCoroutineStmt(S, Block);
return !PM.getInnerMostAncestor<CoreturnStmt, CoroutineSuspendExpr>(S);
}

const Stmt *DeadCodeScan::findDeadCode(const clang::CFGBlock *Block) {
auto &PM = AC.getParentMap();

for (CFGBlock::const_iterator I = Block->begin(), E = Block->end(); I!=E; ++I)
if (std::optional<CFGStmt> CS = I->getAs<CFGStmt>()) {
const Stmt *S = CS->getStmt();
if (isValidDeadStmt(S, Block))
auto *RewrittenParent =
PM.getOuterMostAncestor<CXXDefaultArgExpr, CXXDefaultInitExpr>(S);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I try to fix Nico's example(#128195) about the wrong unrachable code location. The root cause of this issue is that this PR introduce "rewritten" expressions in CFGBlock. When we are finding the real dead code, we should try to ignore the "rewritten" sub-expression nodes in CXXDefaultInitExpr/CXXDefaultArgExpr, but I'm not sure this option is make sense. What do you think?

if (RewrittenParent)
S = RewrittenParent;
if (isValidDeadStmt(AC.getParentMap(), S, Block))
return S;
}

CFGTerminator T = Block->getTerminator();
if (T.isStmtBranch()) {
const Stmt *S = T.getStmt();
if (S && isValidDeadStmt(S, Block))
if (S && isValidDeadStmt(AC.getParentMap(), S, Block))
return S;
}

Expand Down Expand Up @@ -762,7 +730,7 @@ void FindUnreachableCode(AnalysisDeclContext &AC, Preprocessor &PP,
if (reachable[block->getBlockID()])
continue;

DeadCodeScan DS(reachable, PP, AC.getASTContext());
DeadCodeScan DS(reachable, PP, AC);
numReachable += DS.scanBackwards(block, CB);

if (numReachable == cfg->getNumBlockIDs())
Expand Down
9 changes: 6 additions & 3 deletions clang/lib/Sema/SemaExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5623,8 +5623,10 @@ ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc,
/*SkipImmediateInvocations=*/NestedDefaultChecking))
return ExprError();

Expr *RewrittenExpr = (Init == Param->getDefaultArg() ? nullptr : Init);
return CXXDefaultArgExpr::Create(Context, InitializationContext->Loc, Param,
Init, InitializationContext->Context);
RewrittenExpr,
InitializationContext->Context);
}

static FieldDecl *FindFieldDeclInstantiationPattern(const ASTContext &Ctx,
Expand Down Expand Up @@ -5742,10 +5744,11 @@ ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) {
return ExprError();
}
Init = Res.get();

Expr *RewrittenInit =
(Init == Field->getInClassInitializer() ? nullptr : Init);
return CXXDefaultInitExpr::Create(Context, InitializationContext->Loc,
Field, InitializationContext->Context,
Init);
RewrittenInit);
}

// DR1351:
Expand Down
56 changes: 34 additions & 22 deletions clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1995,33 +1995,45 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
ExplodedNodeSet Tmp;
StmtNodeBuilder Bldr2(PreVisit, Tmp, *currBldrCtx);

const Expr *ArgE;
if (const auto *DefE = dyn_cast<CXXDefaultArgExpr>(S))
bool HasRebuiltInit = false;
const Expr *ArgE = nullptr;
if (const auto *DefE = dyn_cast<CXXDefaultArgExpr>(S)) {
ArgE = DefE->getExpr();
else if (const auto *DefE = dyn_cast<CXXDefaultInitExpr>(S))
HasRebuiltInit = DefE->hasRewrittenInit();
} else if (const auto *DefE = dyn_cast<CXXDefaultInitExpr>(S)) {
ArgE = DefE->getExpr();
else
HasRebuiltInit = DefE->hasRewrittenInit();
} else
llvm_unreachable("unknown constant wrapper kind");

bool IsTemporary = false;
if (const auto *MTE = dyn_cast<MaterializeTemporaryExpr>(ArgE)) {
ArgE = MTE->getSubExpr();
IsTemporary = true;
}
if (HasRebuiltInit) {
for (auto *N : PreVisit) {
ProgramStateRef state = N->getState();
const LocationContext *LCtx = N->getLocationContext();
state = state->BindExpr(S, LCtx, state->getSVal(ArgE, LCtx));
Bldr2.generateNode(S, N, state);
}
} else {
// If it's not rewritten, the contents of these expressions are not
// actually part of the current function, so we fall back to constant
// evaluation.
bool IsTemporary = false;
if (const auto *MTE = dyn_cast<MaterializeTemporaryExpr>(ArgE)) {
ArgE = MTE->getSubExpr();
IsTemporary = true;
}

std::optional<SVal> ConstantVal = svalBuilder.getConstantVal(ArgE);
const LocationContext *LCtx = Pred->getLocationContext();
for (auto *I : PreVisit) {
ProgramStateRef State = I->getState();
State = State->BindExpr(S, LCtx, ConstantVal.value_or(UnknownVal()));
if (IsTemporary)
State = createTemporaryRegionIfNeeded(State, LCtx, cast<Expr>(S),
cast<Expr>(S));

std::optional<SVal> ConstantVal = svalBuilder.getConstantVal(ArgE);
if (!ConstantVal)
ConstantVal = UnknownVal();

const LocationContext *LCtx = Pred->getLocationContext();
for (const auto I : PreVisit) {
ProgramStateRef State = I->getState();
State = State->BindExpr(S, LCtx, *ConstantVal);
if (IsTemporary)
State = createTemporaryRegionIfNeeded(State, LCtx,
cast<Expr>(S),
cast<Expr>(S));
Bldr2.generateNode(S, I, State);
Bldr2.generateNode(S, I, State);
}
}

getCheckerManager().runCheckersForPostStmt(Dst, Tmp, S, *this);
Expand Down
2 changes: 1 addition & 1 deletion clang/test/AST/ast-dump-recovery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ union U {
// CHECK-NEXT: `-DeclStmt {{.*}}
// CHECK-NEXT: `-VarDecl {{.*}} g 'U':'GH112560::U' listinit
// CHECK-NEXT: `-InitListExpr {{.*}} 'U':'GH112560::U' contains-errors field Field {{.*}} 'f' 'int'
// CHECK-NEXT: `-CXXDefaultInitExpr {{.*}} 'int' contains-errors has rewritten init
// CHECK-NEXT: `-CXXDefaultInitExpr {{.*}} 'int' contains-errors
// CHECK-NEXT: `-RecoveryExpr {{.*}} 'int' contains-errors
// DISABLED-NOT: -RecoveryExpr {{.*}} contains-errors
void foo() {
Expand Down
Loading
Loading