Skip to content

Commit 8b90b3f

Browse files
[analyzer] Enforce not making overly complicated symbols
CPP-6182
1 parent 2c32b9f commit 8b90b3f

File tree

12 files changed

+211
-81
lines changed

12 files changed

+211
-81
lines changed

clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ class SValBuilder {
5757
protected:
5858
ASTContext &Context;
5959

60+
const AnalyzerOptions &AnOpts;
61+
6062
/// Manager of APSInt values.
6163
BasicValueFactory BasicVals;
6264

@@ -68,8 +70,6 @@ class SValBuilder {
6870

6971
ProgramStateManager &StateMgr;
7072

71-
const AnalyzerOptions &AnOpts;
72-
7373
/// The scalar type to use for array indices.
7474
const QualType ArrayIndexTy;
7575

@@ -326,8 +326,8 @@ class SValBuilder {
326326
nonloc::SymbolVal makeNonLoc(const SymExpr *lhs, BinaryOperator::Opcode op,
327327
const SymExpr *rhs, QualType type);
328328

329-
NonLoc makeNonLoc(const SymExpr *operand, UnaryOperator::Opcode op,
330-
QualType type);
329+
nonloc::SymbolVal makeNonLoc(const SymExpr *operand, UnaryOperator::Opcode op,
330+
QualType type);
331331

332332
/// Create a NonLoc value for cast.
333333
nonloc::SymbolVal makeNonLoc(const SymExpr *operand, QualType fromTy,

clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,18 +51,18 @@ class SymExpr : public llvm::FoldingSetNode {
5151
/// Note, however, that it can't be used in Profile because SymbolManager
5252
/// needs to compute Profile before allocating SymExpr.
5353
const SymbolID Sym;
54+
const unsigned Complexity;
5455

5556
protected:
56-
SymExpr(Kind k, SymbolID Sym) : K(k), Sym(Sym) {}
57+
SymExpr(Kind k, SymbolID Sym, unsigned Complexity)
58+
: K(k), Sym(Sym), Complexity(Complexity) {}
5759

5860
static bool isValidTypeForSymbol(QualType T) {
5961
// FIXME: Depending on whether we choose to deprecate structural symbols,
6062
// this may become much stricter.
6163
return !T.isNull() && !T->isVoidType();
6264
}
6365

64-
mutable unsigned Complexity = 0;
65-
6666
public:
6767
virtual ~SymExpr() = default;
6868

@@ -108,7 +108,7 @@ class SymExpr : public llvm::FoldingSetNode {
108108
return llvm::make_range(symbol_iterator(this), symbol_iterator());
109109
}
110110

111-
virtual unsigned computeComplexity() const = 0;
111+
unsigned complexity() const { return Complexity; }
112112

113113
/// Find the region from which this symbol originates.
114114
///
@@ -136,21 +136,22 @@ using SymbolRefSmallVectorTy = SmallVector<SymbolRef, 2>;
136136
/// A symbol representing data which can be stored in a memory location
137137
/// (region).
138138
class SymbolData : public SymExpr {
139+
friend class SymbolManager;
139140
void anchor() override;
140141

141142
protected:
142-
SymbolData(Kind k, SymbolID sym) : SymExpr(k, sym) { assert(classof(this)); }
143+
SymbolData(Kind k, SymbolID sym) : SymExpr(k, sym, computeComplexity()) {
144+
assert(classof(this));
145+
}
146+
147+
static unsigned computeComplexity(...) { return 1; }
143148

144149
public:
145150
~SymbolData() override = default;
146151

147152
/// Get a string representation of the kind of the region.
148153
virtual StringRef getKindStr() const = 0;
149154

150-
unsigned computeComplexity() const override {
151-
return 1;
152-
};
153-
154155
// Implement isa<T> support.
155156
static bool classof(const SymExpr *SE) { return classof(SE->getKind()); }
156157
static constexpr bool classof(Kind K) {

clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h

Lines changed: 92 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@
1515
#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
1616

1717
#include "clang/AST/Expr.h"
18+
#include "clang/AST/OperationKinds.h"
1819
#include "clang/AST/Type.h"
1920
#include "clang/Analysis/AnalysisDeclContext.h"
2021
#include "clang/Basic/LLVM.h"
22+
#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
2123
#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntPtr.h"
2224
#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
2325
#include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h"
@@ -26,9 +28,11 @@
2628
#include "llvm/ADT/DenseSet.h"
2729
#include "llvm/ADT/FoldingSet.h"
2830
#include "llvm/ADT/ImmutableSet.h"
31+
#include "llvm/ADT/STLExtras.h"
2932
#include "llvm/ADT/iterator_range.h"
3033
#include "llvm/Support/Allocator.h"
3134
#include <cassert>
35+
#include <type_traits>
3236

3337
namespace clang {
3438

@@ -133,6 +137,47 @@ class SymbolConjured : public SymbolData {
133137
static constexpr bool classof(Kind K) { return K == ClassKind; }
134138
};
135139

140+
/// A symbol representing the result of an expression that became too
141+
/// complicated. In other words, its complexity surpassed the
142+
/// MaxSymbolComplexity threshold.
143+
/// TODO: When the MaxSymbolComplexity is reached, we should propagate the taint
144+
/// info to it.
145+
class SymbolOverlyComplex final : public SymbolData {
146+
const SymExpr *OverlyComplicatedSymbol;
147+
148+
friend class SymExprAllocator;
149+
150+
SymbolOverlyComplex(SymbolID Sym, const SymExpr *OverlyComplicatedSymbol)
151+
: SymbolData(ClassKind, Sym),
152+
OverlyComplicatedSymbol(OverlyComplicatedSymbol) {
153+
assert(OverlyComplicatedSymbol);
154+
}
155+
156+
public:
157+
QualType getType() const override {
158+
return OverlyComplicatedSymbol->getType();
159+
}
160+
161+
StringRef getKindStr() const override;
162+
163+
void dumpToStream(raw_ostream &os) const override;
164+
165+
static void Profile(llvm::FoldingSetNodeID &profile,
166+
const SymExpr *OverlyComplicatedSymbol) {
167+
profile.AddInteger((unsigned)ClassKind);
168+
profile.AddPointer(OverlyComplicatedSymbol);
169+
}
170+
171+
void Profile(llvm::FoldingSetNodeID &profile) override {
172+
Profile(profile, OverlyComplicatedSymbol);
173+
}
174+
175+
// Implement isa<T> support.
176+
static constexpr Kind ClassKind = SymbolOverlyComplexKind;
177+
static bool classof(const SymExpr *SE) { return classof(SE->getKind()); }
178+
static constexpr bool classof(Kind K) { return K == ClassKind; }
179+
};
180+
136181
/// A symbol representing the value of a MemRegion whose parent region has
137182
/// symbolic value.
138183
class SymbolDerived : public SymbolData {
@@ -285,6 +330,7 @@ class SymbolMetadata : public SymbolData {
285330

286331
/// Represents a cast expression.
287332
class SymbolCast : public SymExpr {
333+
friend class SymbolManager;
288334
const SymExpr *Operand;
289335

290336
/// Type of the operand.
@@ -295,20 +341,19 @@ class SymbolCast : public SymExpr {
295341

296342
friend class SymExprAllocator;
297343
SymbolCast(SymbolID Sym, const SymExpr *In, QualType From, QualType To)
298-
: SymExpr(ClassKind, Sym), Operand(In), FromTy(From), ToTy(To) {
344+
: SymExpr(ClassKind, Sym, computeComplexity(In, From, To)), Operand(In),
345+
FromTy(From), ToTy(To) {
299346
assert(In);
300347
assert(isValidTypeForSymbol(From));
301348
// FIXME: GenericTaintChecker creates symbols of void type.
302349
// Otherwise, 'To' should also be a valid type.
303350
}
304351

305-
public:
306-
unsigned computeComplexity() const override {
307-
if (Complexity == 0)
308-
Complexity = 1 + Operand->computeComplexity();
309-
return Complexity;
352+
static unsigned computeComplexity(const SymExpr *In, QualType, QualType) {
353+
return In->complexity() + 1;
310354
}
311355

356+
public:
312357
QualType getType() const override { return ToTy; }
313358

314359
LLVM_ATTRIBUTE_RETURNS_NONNULL
@@ -336,14 +381,16 @@ class SymbolCast : public SymExpr {
336381

337382
/// Represents a symbolic expression involving a unary operator.
338383
class UnarySymExpr : public SymExpr {
384+
friend class SymbolManager;
339385
const SymExpr *Operand;
340386
UnaryOperator::Opcode Op;
341387
QualType T;
342388

343389
friend class SymExprAllocator;
344390
UnarySymExpr(SymbolID Sym, const SymExpr *In, UnaryOperator::Opcode Op,
345391
QualType T)
346-
: SymExpr(ClassKind, Sym), Operand(In), Op(Op), T(T) {
392+
: SymExpr(ClassKind, Sym, computeComplexity(In, Op, T)), Operand(In),
393+
Op(Op), T(T) {
347394
// Note, some unary operators are modeled as a binary operator. E.g. ++x is
348395
// modeled as x + 1.
349396
assert((Op == UO_Minus || Op == UO_Not) && "non-supported unary expression");
@@ -354,13 +401,12 @@ class UnarySymExpr : public SymExpr {
354401
assert(!Loc::isLocType(T) && "unary symbol should be nonloc");
355402
}
356403

357-
public:
358-
unsigned computeComplexity() const override {
359-
if (Complexity == 0)
360-
Complexity = 1 + Operand->computeComplexity();
361-
return Complexity;
404+
static unsigned computeComplexity(const SymExpr *In, UnaryOperator::Opcode,
405+
QualType) {
406+
return In->complexity() + 1;
362407
}
363408

409+
public:
364410
const SymExpr *getOperand() const { return Operand; }
365411
UnaryOperator::Opcode getOpcode() const { return Op; }
366412
QualType getType() const override { return T; }
@@ -391,8 +437,9 @@ class BinarySymExpr : public SymExpr {
391437
QualType T;
392438

393439
protected:
394-
BinarySymExpr(SymbolID Sym, Kind k, BinaryOperator::Opcode op, QualType t)
395-
: SymExpr(k, Sym), Op(op), T(t) {
440+
BinarySymExpr(SymbolID Sym, Kind k, BinaryOperator::Opcode op, QualType t,
441+
unsigned Complexity)
442+
: SymExpr(k, Sym, Complexity), Op(op), T(t) {
396443
assert(classof(this));
397444
// Binary expressions are results of arithmetic. Pointer arithmetic is not
398445
// handled by binary expressions, but it is instead handled by applying
@@ -415,7 +462,7 @@ class BinarySymExpr : public SymExpr {
415462

416463
protected:
417464
static unsigned computeOperandComplexity(const SymExpr *Value) {
418-
return Value->computeComplexity();
465+
return Value->complexity();
419466
}
420467
static unsigned computeOperandComplexity(const llvm::APSInt &Value) {
421468
return 1;
@@ -432,17 +479,26 @@ class BinarySymExpr : public SymExpr {
432479
/// Template implementation for all binary symbolic expressions
433480
template <class LHSTYPE, class RHSTYPE, SymExpr::Kind ClassK>
434481
class BinarySymExprImpl : public BinarySymExpr {
482+
friend class SymbolManager;
435483
LHSTYPE LHS;
436484
RHSTYPE RHS;
437485

438486
friend class SymExprAllocator;
439487
BinarySymExprImpl(SymbolID Sym, LHSTYPE lhs, BinaryOperator::Opcode op,
440488
RHSTYPE rhs, QualType t)
441-
: BinarySymExpr(Sym, ClassKind, op, t), LHS(lhs), RHS(rhs) {
489+
: BinarySymExpr(Sym, ClassKind, op, t,
490+
computeComplexity(lhs, op, rhs, t)),
491+
LHS(lhs), RHS(rhs) {
442492
assert(getPointer(lhs));
443493
assert(getPointer(rhs));
444494
}
445495

496+
static unsigned computeComplexity(LHSTYPE lhs, BinaryOperator::Opcode,
497+
RHSTYPE rhs, QualType) {
498+
// FIXME: Should we add 1 to complexity?
499+
return computeOperandComplexity(lhs) + computeOperandComplexity(rhs);
500+
}
501+
446502
public:
447503
void dumpToStream(raw_ostream &os) const override {
448504
dumpToStreamImpl(os, LHS);
@@ -453,13 +509,6 @@ class BinarySymExprImpl : public BinarySymExpr {
453509
LHSTYPE getLHS() const { return LHS; }
454510
RHSTYPE getRHS() const { return RHS; }
455511

456-
unsigned computeComplexity() const override {
457-
if (Complexity == 0)
458-
Complexity =
459-
computeOperandComplexity(RHS) + computeOperandComplexity(LHS);
460-
return Complexity;
461-
}
462-
463512
static void Profile(llvm::FoldingSetNodeID &ID, LHSTYPE lhs,
464513
BinaryOperator::Opcode op, RHSTYPE rhs, QualType t) {
465514
ID.AddInteger((unsigned)ClassKind);
@@ -520,27 +569,30 @@ class SymbolManager {
520569
SymExprAllocator Alloc;
521570
BasicValueFactory &BV;
522571
ASTContext &Ctx;
572+
const unsigned MaxCompComplexity;
523573

524574
public:
525575
SymbolManager(ASTContext &ctx, BasicValueFactory &bv,
526-
llvm::BumpPtrAllocator &bpalloc)
527-
: SymbolDependencies(16), Alloc(bpalloc), BV(bv), Ctx(ctx) {}
576+
llvm::BumpPtrAllocator &bpalloc, const AnalyzerOptions &Opts)
577+
: SymbolDependencies(16), Alloc(bpalloc), BV(bv), Ctx(ctx),
578+
MaxCompComplexity(Opts.MaxSymbolComplexity) {
579+
assert(MaxCompComplexity > 0 && "Zero max complexity doesn't make sense");
580+
}
528581

529582
static bool canSymbolicate(QualType T);
530583

531-
/// Create or retrieve a SymExpr of type \p SymExprT for the given arguments.
584+
/// Create or retrieve a SymExpr of type \p T for the given arguments.
532585
/// Use the arguments to check for an existing SymExpr and return it,
533586
/// otherwise, create a new one and keep a pointer to it to avoid duplicates.
534-
template <typename SymExprT, typename... Args>
535-
const SymExprT *acquire(Args &&...args);
587+
template <typename T, typename... Args,
588+
typename Ret = std::conditional_t<SymbolData::classof(T::ClassKind),
589+
T, SymExpr>>
590+
LLVM_ATTRIBUTE_RETURNS_NONNULL const Ret *acquire(Args &&...args);
536591

537592
const SymbolConjured *conjureSymbol(ConstCFGElementRef Elem,
538593
const LocationContext *LCtx, QualType T,
539594
unsigned VisitCount,
540-
const void *SymbolTag = nullptr) {
541-
542-
return acquire<SymbolConjured>(Elem, LCtx, T, VisitCount, SymbolTag);
543-
}
595+
const void *SymbolTag = nullptr);
544596

545597
QualType getType(const SymExpr *SE) const {
546598
return SE->getType();
@@ -673,17 +725,22 @@ class SymbolVisitor {
673725
virtual bool VisitMemRegion(const MemRegion *) { return true; }
674726
};
675727

676-
template <typename T, typename... Args>
677-
const T *SymbolManager::acquire(Args &&...args) {
728+
// Returns a const pointer to T if T is a SymbolData, otherwise SymExpr.
729+
template <typename T, typename... Args, typename Ret>
730+
const Ret *SymbolManager::acquire(Args &&...args) {
678731
llvm::FoldingSetNodeID profile;
679732
T::Profile(profile, args...);
680733
void *InsertPos;
681734
SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
682735
if (!SD) {
683736
SD = Alloc.make<T>(std::forward<Args>(args)...);
684737
DataSet.InsertNode(SD, InsertPos);
738+
if (SD->complexity() > MaxCompComplexity) {
739+
return cast<Ret>(acquire<SymbolOverlyComplex>(SD));
740+
}
685741
}
686-
return cast<T>(SD);
742+
743+
return cast<Ret>(SD);
687744
}
688745

689746
} // namespace ento

clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ SYMBOL(SymbolCast, SymExpr)
4545

4646
ABSTRACT_SYMBOL(SymbolData, SymExpr)
4747
SYMBOL(SymbolConjured, SymbolData)
48+
SYMBOL(SymbolOverlyComplex, SymbolData)
4849
SYMBOL(SymbolDerived, SymbolData)
4950
SYMBOL(SymbolExtent, SymbolData)
5051
SYMBOL(SymbolMetadata, SymbolData)

clang/lib/StaticAnalyzer/Checkers/Taint.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
267267

268268
// HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
269269
if (const auto &Opts = State->getAnalysisManager().getAnalyzerOptions();
270-
Sym->computeComplexity() > Opts.MaxTaintedSymbolComplexity) {
270+
Sym->complexity() > Opts.MaxTaintedSymbolComplexity) {
271271
return {};
272272
}
273273

clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ class TrustNonnullChecker : public Checker<check::PostCall,
6666
SVal Cond,
6767
bool Assumption) const {
6868
const SymbolRef CondS = Cond.getAsSymbol();
69-
if (!CondS || CondS->computeComplexity() > ComplexityThreshold)
69+
if (!CondS || CondS->complexity() > ComplexityThreshold)
7070
return State;
7171

7272
for (SymbolRef Antecedent : CondS->symbols()) {

clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ void ExprEngine::VisitBinaryOperator(const BinaryOperator* B,
6767
if (RightV.isUnknown()) {
6868
unsigned Count = currBldrCtx->blockCount();
6969
RightV = svalBuilder.conjureSymbolVal(nullptr, getCFGElementRef(), LCtx,
70-
Count);
70+
RHS->getType(), Count);
7171
}
7272
// Simulate the effects of a "store": bind the value of the RHS
7373
// to the L-Value represented by the LHS.

0 commit comments

Comments
 (0)