Skip to content

[analyzer] Enforce not making overly complicated symbols #144327

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ class SValBuilder {
protected:
ASTContext &Context;

const AnalyzerOptions &AnOpts;

/// Manager of APSInt values.
BasicValueFactory BasicVals;

Expand All @@ -68,8 +70,6 @@ class SValBuilder {

ProgramStateManager &StateMgr;

const AnalyzerOptions &AnOpts;

/// The scalar type to use for array indices.
const QualType ArrayIndexTy;

Expand Down Expand Up @@ -326,8 +326,8 @@ class SValBuilder {
nonloc::SymbolVal makeNonLoc(const SymExpr *lhs, BinaryOperator::Opcode op,
const SymExpr *rhs, QualType type);

NonLoc makeNonLoc(const SymExpr *operand, UnaryOperator::Opcode op,
QualType type);
nonloc::SymbolVal makeNonLoc(const SymExpr *operand, UnaryOperator::Opcode op,
QualType type);

/// Create a NonLoc value for cast.
nonloc::SymbolVal makeNonLoc(const SymExpr *operand, QualType fromTy,
Expand Down
19 changes: 10 additions & 9 deletions clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,18 @@ class SymExpr : public llvm::FoldingSetNode {
/// Note, however, that it can't be used in Profile because SymbolManager
/// needs to compute Profile before allocating SymExpr.
const SymbolID Sym;
const unsigned Complexity;

protected:
SymExpr(Kind k, SymbolID Sym) : K(k), Sym(Sym) {}
SymExpr(Kind k, SymbolID Sym, unsigned Complexity)
: K(k), Sym(Sym), Complexity(Complexity) {}

static bool isValidTypeForSymbol(QualType T) {
// FIXME: Depending on whether we choose to deprecate structural symbols,
// this may become much stricter.
return !T.isNull() && !T->isVoidType();
}

mutable unsigned Complexity = 0;

public:
virtual ~SymExpr() = default;

Expand Down Expand Up @@ -108,7 +108,7 @@ class SymExpr : public llvm::FoldingSetNode {
return llvm::make_range(symbol_iterator(this), symbol_iterator());
}

virtual unsigned computeComplexity() const = 0;
unsigned complexity() const { return Complexity; }

/// Find the region from which this symbol originates.
///
Expand Down Expand Up @@ -136,21 +136,22 @@ using SymbolRefSmallVectorTy = SmallVector<SymbolRef, 2>;
/// A symbol representing data which can be stored in a memory location
/// (region).
class SymbolData : public SymExpr {
friend class SymbolManager;
void anchor() override;

protected:
SymbolData(Kind k, SymbolID sym) : SymExpr(k, sym) { assert(classof(this)); }
SymbolData(Kind k, SymbolID sym) : SymExpr(k, sym, computeComplexity()) {
assert(classof(this));
}

static unsigned computeComplexity(...) { return 1; }

public:
~SymbolData() override = default;

/// Get a string representation of the kind of the region.
virtual StringRef getKindStr() const = 0;

unsigned computeComplexity() const override {
return 1;
};

// Implement isa<T> support.
static bool classof(const SymExpr *SE) { return classof(SE->getKind()); }
static constexpr bool classof(Kind K) {
Expand Down
127 changes: 92 additions & 35 deletions clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H

#include "clang/AST/Expr.h"
#include "clang/AST/OperationKinds.h"
#include "clang/AST/Type.h"
#include "clang/Analysis/AnalysisDeclContext.h"
#include "clang/Basic/LLVM.h"
#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntPtr.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h"
Expand All @@ -26,9 +28,11 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/ImmutableSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Allocator.h"
#include <cassert>
#include <type_traits>

namespace clang {

Expand Down Expand Up @@ -133,6 +137,47 @@ class SymbolConjured : public SymbolData {
static constexpr bool classof(Kind K) { return K == ClassKind; }
};

/// A symbol representing the result of an expression that became too
/// complicated. In other words, its complexity surpassed the
/// MaxSymbolComplexity threshold.
/// TODO: When the MaxSymbolComplexity is reached, we should propagate the taint
/// info to it.
class SymbolOverlyComplex final : public SymbolData {
const SymExpr *OverlyComplicatedSymbol;

friend class SymExprAllocator;

SymbolOverlyComplex(SymbolID Sym, const SymExpr *OverlyComplicatedSymbol)
: SymbolData(ClassKind, Sym),
OverlyComplicatedSymbol(OverlyComplicatedSymbol) {
assert(OverlyComplicatedSymbol);
}

public:
QualType getType() const override {
return OverlyComplicatedSymbol->getType();
}

StringRef getKindStr() const override;

void dumpToStream(raw_ostream &os) const override;

static void Profile(llvm::FoldingSetNodeID &profile,
const SymExpr *OverlyComplicatedSymbol) {
profile.AddInteger((unsigned)ClassKind);
profile.AddPointer(OverlyComplicatedSymbol);
}

void Profile(llvm::FoldingSetNodeID &profile) override {
Profile(profile, OverlyComplicatedSymbol);
}

// Implement isa<T> support.
static constexpr Kind ClassKind = SymbolOverlyComplexKind;
static bool classof(const SymExpr *SE) { return classof(SE->getKind()); }
static constexpr bool classof(Kind K) { return K == ClassKind; }
};

/// A symbol representing the value of a MemRegion whose parent region has
/// symbolic value.
class SymbolDerived : public SymbolData {
Expand Down Expand Up @@ -285,6 +330,7 @@ class SymbolMetadata : public SymbolData {

/// Represents a cast expression.
class SymbolCast : public SymExpr {
friend class SymbolManager;
const SymExpr *Operand;

/// Type of the operand.
Expand All @@ -295,20 +341,19 @@ class SymbolCast : public SymExpr {

friend class SymExprAllocator;
SymbolCast(SymbolID Sym, const SymExpr *In, QualType From, QualType To)
: SymExpr(ClassKind, Sym), Operand(In), FromTy(From), ToTy(To) {
: SymExpr(ClassKind, Sym, computeComplexity(In, From, To)), Operand(In),
FromTy(From), ToTy(To) {
assert(In);
assert(isValidTypeForSymbol(From));
// FIXME: GenericTaintChecker creates symbols of void type.
// Otherwise, 'To' should also be a valid type.
}

public:
unsigned computeComplexity() const override {
if (Complexity == 0)
Complexity = 1 + Operand->computeComplexity();
return Complexity;
static unsigned computeComplexity(const SymExpr *In, QualType, QualType) {
return In->complexity() + 1;
}

public:
QualType getType() const override { return ToTy; }

LLVM_ATTRIBUTE_RETURNS_NONNULL
Expand Down Expand Up @@ -336,14 +381,16 @@ class SymbolCast : public SymExpr {

/// Represents a symbolic expression involving a unary operator.
class UnarySymExpr : public SymExpr {
friend class SymbolManager;
const SymExpr *Operand;
UnaryOperator::Opcode Op;
QualType T;

friend class SymExprAllocator;
UnarySymExpr(SymbolID Sym, const SymExpr *In, UnaryOperator::Opcode Op,
QualType T)
: SymExpr(ClassKind, Sym), Operand(In), Op(Op), T(T) {
: SymExpr(ClassKind, Sym, computeComplexity(In, Op, T)), Operand(In),
Op(Op), T(T) {
// Note, some unary operators are modeled as a binary operator. E.g. ++x is
// modeled as x + 1.
assert((Op == UO_Minus || Op == UO_Not) && "non-supported unary expression");
Expand All @@ -354,13 +401,12 @@ class UnarySymExpr : public SymExpr {
assert(!Loc::isLocType(T) && "unary symbol should be nonloc");
}

public:
unsigned computeComplexity() const override {
if (Complexity == 0)
Complexity = 1 + Operand->computeComplexity();
return Complexity;
static unsigned computeComplexity(const SymExpr *In, UnaryOperator::Opcode,
QualType) {
return In->complexity() + 1;
}

public:
const SymExpr *getOperand() const { return Operand; }
UnaryOperator::Opcode getOpcode() const { return Op; }
QualType getType() const override { return T; }
Expand Down Expand Up @@ -391,8 +437,9 @@ class BinarySymExpr : public SymExpr {
QualType T;

protected:
BinarySymExpr(SymbolID Sym, Kind k, BinaryOperator::Opcode op, QualType t)
: SymExpr(k, Sym), Op(op), T(t) {
BinarySymExpr(SymbolID Sym, Kind k, BinaryOperator::Opcode op, QualType t,
unsigned Complexity)
: SymExpr(k, Sym, Complexity), Op(op), T(t) {
assert(classof(this));
// Binary expressions are results of arithmetic. Pointer arithmetic is not
// handled by binary expressions, but it is instead handled by applying
Expand All @@ -415,7 +462,7 @@ class BinarySymExpr : public SymExpr {

protected:
static unsigned computeOperandComplexity(const SymExpr *Value) {
return Value->computeComplexity();
return Value->complexity();
}
static unsigned computeOperandComplexity(const llvm::APSInt &Value) {
return 1;
Expand All @@ -432,17 +479,26 @@ class BinarySymExpr : public SymExpr {
/// Template implementation for all binary symbolic expressions
template <class LHSTYPE, class RHSTYPE, SymExpr::Kind ClassK>
class BinarySymExprImpl : public BinarySymExpr {
friend class SymbolManager;
LHSTYPE LHS;
RHSTYPE RHS;

friend class SymExprAllocator;
BinarySymExprImpl(SymbolID Sym, LHSTYPE lhs, BinaryOperator::Opcode op,
RHSTYPE rhs, QualType t)
: BinarySymExpr(Sym, ClassKind, op, t), LHS(lhs), RHS(rhs) {
: BinarySymExpr(Sym, ClassKind, op, t,
computeComplexity(lhs, op, rhs, t)),
LHS(lhs), RHS(rhs) {
assert(getPointer(lhs));
assert(getPointer(rhs));
}

static unsigned computeComplexity(LHSTYPE lhs, BinaryOperator::Opcode,
RHSTYPE rhs, QualType) {
// FIXME: Should we add 1 to complexity?
return computeOperandComplexity(lhs) + computeOperandComplexity(rhs);
}

public:
void dumpToStream(raw_ostream &os) const override {
dumpToStreamImpl(os, LHS);
Expand All @@ -453,13 +509,6 @@ class BinarySymExprImpl : public BinarySymExpr {
LHSTYPE getLHS() const { return LHS; }
RHSTYPE getRHS() const { return RHS; }

unsigned computeComplexity() const override {
if (Complexity == 0)
Complexity =
computeOperandComplexity(RHS) + computeOperandComplexity(LHS);
return Complexity;
}

static void Profile(llvm::FoldingSetNodeID &ID, LHSTYPE lhs,
BinaryOperator::Opcode op, RHSTYPE rhs, QualType t) {
ID.AddInteger((unsigned)ClassKind);
Expand Down Expand Up @@ -520,27 +569,30 @@ class SymbolManager {
SymExprAllocator Alloc;
BasicValueFactory &BV;
ASTContext &Ctx;
const unsigned MaxCompComplexity;

public:
SymbolManager(ASTContext &ctx, BasicValueFactory &bv,
llvm::BumpPtrAllocator &bpalloc)
: SymbolDependencies(16), Alloc(bpalloc), BV(bv), Ctx(ctx) {}
llvm::BumpPtrAllocator &bpalloc, const AnalyzerOptions &Opts)
: SymbolDependencies(16), Alloc(bpalloc), BV(bv), Ctx(ctx),
MaxCompComplexity(Opts.MaxSymbolComplexity) {
assert(MaxCompComplexity > 0 && "Zero max complexity doesn't make sense");
}

static bool canSymbolicate(QualType T);

/// Create or retrieve a SymExpr of type \p SymExprT for the given arguments.
/// Create or retrieve a SymExpr of type \p T for the given arguments.
/// Use the arguments to check for an existing SymExpr and return it,
/// otherwise, create a new one and keep a pointer to it to avoid duplicates.
template <typename SymExprT, typename... Args>
const SymExprT *acquire(Args &&...args);
template <typename T, typename... Args,
typename Ret = std::conditional_t<SymbolData::classof(T::ClassKind),
T, SymExpr>>
LLVM_ATTRIBUTE_RETURNS_NONNULL const Ret *acquire(Args &&...args);

const SymbolConjured *conjureSymbol(ConstCFGElementRef Elem,
const LocationContext *LCtx, QualType T,
unsigned VisitCount,
const void *SymbolTag = nullptr) {

return acquire<SymbolConjured>(Elem, LCtx, T, VisitCount, SymbolTag);
}
const void *SymbolTag = nullptr);

QualType getType(const SymExpr *SE) const {
return SE->getType();
Expand Down Expand Up @@ -673,17 +725,22 @@ class SymbolVisitor {
virtual bool VisitMemRegion(const MemRegion *) { return true; }
};

template <typename T, typename... Args>
const T *SymbolManager::acquire(Args &&...args) {
// Returns a const pointer to T if T is a SymbolData, otherwise SymExpr.
template <typename T, typename... Args, typename Ret>
const Ret *SymbolManager::acquire(Args &&...args) {
llvm::FoldingSetNodeID profile;
T::Profile(profile, args...);
void *InsertPos;
SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
if (!SD) {
SD = Alloc.make<T>(std::forward<Args>(args)...);
DataSet.InsertNode(SD, InsertPos);
if (SD->complexity() > MaxCompComplexity) {
return cast<Ret>(acquire<SymbolOverlyComplex>(SD));
}
}
return cast<T>(SD);

return cast<Ret>(SD);
}

} // namespace ento
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ SYMBOL(SymbolCast, SymExpr)

ABSTRACT_SYMBOL(SymbolData, SymExpr)
SYMBOL(SymbolConjured, SymbolData)
SYMBOL(SymbolOverlyComplex, SymbolData)
SYMBOL(SymbolDerived, SymbolData)
SYMBOL(SymbolExtent, SymbolData)
SYMBOL(SymbolMetadata, SymbolData)
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/StaticAnalyzer/Checkers/Taint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,

// HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
if (const auto &Opts = State->getAnalysisManager().getAnalyzerOptions();
Sym->computeComplexity() > Opts.MaxTaintedSymbolComplexity) {
Sym->complexity() > Opts.MaxTaintedSymbolComplexity) {
return {};
}

Expand Down
2 changes: 1 addition & 1 deletion clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class TrustNonnullChecker : public Checker<check::PostCall,
SVal Cond,
bool Assumption) const {
const SymbolRef CondS = Cond.getAsSymbol();
if (!CondS || CondS->computeComplexity() > ComplexityThreshold)
if (!CondS || CondS->complexity() > ComplexityThreshold)
return State;

for (SymbolRef Antecedent : CondS->symbols()) {
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ void ExprEngine::VisitBinaryOperator(const BinaryOperator* B,
if (RightV.isUnknown()) {
unsigned Count = currBldrCtx->blockCount();
RightV = svalBuilder.conjureSymbolVal(nullptr, getCFGElementRef(), LCtx,
Count);
RHS->getType(), Count);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interestingly in #137355 @fangyi-zhou changed the behavior of this line, thus needed a tiny bit of adjustment to make the new test pass while I was uplifting this downstream patch to current llvm main.
I didn't investigate the case beyond that this was the line that conjured a symbol of a wrong type after #137355, probably because in the past we directly passed a QualType here but after that change we rely on deducing the type from getCFGElementRef() - which is apparently wrong. To see the behavior, revert this hunk and see the broken test. There could be more places where this type mismatch on conjure could cause issues, but I didn't audit the code further.

}
// Simulate the effects of a "store": bind the value of the RHS
// to the L-Value represented by the LHS.
Expand Down
Loading