Skip to content

[Clang][objectsize] Generate object size calculation for sub-objects #86858

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 188 additions & 6 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "clang/AST/Decl.h"
#include "clang/AST/OSLog.h"
#include "clang/AST/OperationKinds.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TargetOptions.h"
Expand Down Expand Up @@ -1041,6 +1042,184 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
}

namespace {

/// BaseObjectVisitor - Take an expression and return its underlying object,
/// removing casts and unary ops (addrof or deref) that don't affect the
/// object's underlying type.
///
/// Object Underlying Object
/// ------ -----------------
/// p[x] p
/// &p[x] p[x]
/// &(*(int *)&((char *)&p[22][1])[x] ((char *)&p[22][1])[x]
///
class BaseObjectVisitor
: public ConstStmtVisitor<BaseObjectVisitor, const Expr *> {
ASTContext &Ctx;

public:
BaseObjectVisitor(ASTContext &Ctx) : Ctx(Ctx) {}

const Expr *Visit(const Expr *E) {
return ConstStmtVisitor<BaseObjectVisitor, const Expr *>::Visit(E);
}
const Expr *VisitStmt(const Stmt *S) { return nullptr; }

const Expr *VisitDeclRefExpr(const DeclRefExpr *E) { return E; }
const Expr *VisitMemberExpr(const MemberExpr *E) { return E; }
const Expr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
return E->getBase()->IgnoreParens();
}

const Expr *VisitCastExpr(const CastExpr *E) {
const Expr *NoopE = E->IgnoreParenNoopCasts(Ctx);
return NoopE == E ? nullptr : Visit(NoopE);
}
const Expr *VisitUnaryAddrOf(const clang::UnaryOperator *E) {
const Expr *SubExpr = E->getSubExpr()->IgnoreParens();
if (isa<MemberExpr>(SubExpr) || isa<DeclRefExpr>(SubExpr) ||
isa<ArraySubscriptExpr>(SubExpr))
return SubExpr;
return Visit(SubExpr);
}
const Expr *VisitUnaryDeref(const clang::UnaryOperator *E) {
return Visit(E->getSubExpr()->IgnoreParens());
}
};

/// ArrayBaseVisitor - Get the base MemberExpr of the expression. This is used
/// to test the array base to see if it's a flexible array member. We skip
/// past all casts, '*'s, and '&'s, because they aren't important for that
/// analysis.
class ArrayBaseVisitor
: public ConstStmtVisitor<ArrayBaseVisitor, const Expr *> {
public:
const Expr *Visit(const Expr *E) {
return ConstStmtVisitor<ArrayBaseVisitor, const Expr *>::Visit(E);
}
const Expr *VisitStmt(const Stmt *S) { return nullptr; }

const Expr *VisitDeclRefExpr(const DeclRefExpr *E) { return E; }
const Expr *VisitMemberExpr(const MemberExpr *E) { return E; }
const Expr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) { return E; }

const Expr *VisitCastExpr(const CastExpr *E) {
return Visit(E->getSubExpr());
}
const Expr *VisitUnaryAddrOf(const clang::UnaryOperator *E) {
const Expr *SubExpr = E->getSubExpr()->IgnoreParens();
if (isa<MemberExpr>(SubExpr) || isa<DeclRefExpr>(SubExpr) ||
isa<ArraySubscriptExpr>(SubExpr))
return SubExpr;
return Visit(SubExpr);
}
const Expr *VisitUnaryDeref(const clang::UnaryOperator *E) {
return Visit(E->getSubExpr()->IgnoreParens());
}
};

/// ArrayIndexVisitor - Get the index expression of the top-level array.
class ArrayIndexVisitor
: public ConstStmtVisitor<ArrayIndexVisitor, const Expr *> {
public:
const Expr *Visit(const Expr *E) {
return ConstStmtVisitor<ArrayIndexVisitor, const Expr *>::Visit(E);
}
const Expr *VisitStmt(const Stmt *S) { return nullptr; }

const Expr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
return E->getIdx()->IgnoreParens();
}

const Expr *VisitCastExpr(const CastExpr *E) {
return Visit(E->getSubExpr()->IgnoreParens());
}
const Expr *VisitUnaryAddrOf(const clang::UnaryOperator *E) {
return Visit(E->getSubExpr()->IgnoreParens());
}
const Expr *VisitUnaryDeref(const clang::UnaryOperator *E) {
return Visit(E->getSubExpr()->IgnoreParens());
}
};

} // end anonymous namespace

/// tryToCalculateSubObjectSize - It may be possible to calculate the
/// sub-object size of an array and skip the generation of the llvm.objectsize
/// intrinsic. This avoids the complication in conveying the sub-object's
/// information to the backend.
llvm::Value *
CodeGenFunction::tryToCalculateSubObjectSize(const Expr *E, unsigned Type,
llvm::IntegerType *ResType) {
if ((Type & 0x01) != 1)
// Only support sub-object calculation.
return nullptr;

E = E->IgnoreParens();

// BaseObj is the object we want the size of.
ASTContext &Ctx = getContext();
const Expr *BaseObj = BaseObjectVisitor(Ctx).Visit(E);
if (!BaseObj)
return nullptr;

// Return the sub-object of the base object, which is expected to be an array
// or casts surrounding an array.
const Expr *ArrayBase = (isa<ArraySubscriptExpr>(BaseObj)
? cast<ArraySubscriptExpr>(BaseObj)->getBase()
: BaseObj)
->IgnoreParens();

ArrayBase = ArrayBaseVisitor().Visit(ArrayBase);
if (!ArrayBase)
return nullptr;

// Check to see if the underlying object's base is a flexible array member.
// Processing of the 'counted_by' attribute is done by now. So return MAX_INT
// because we don't have any information on the size.
LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
getLangOpts().getStrictFlexArraysLevel();
if (ArrayBase->isFlexibleArrayMemberLike(
Ctx, StrictFlexArraysLevel,
/*IgnoreTemplateOrMacroSubstitution=*/true))
return ConstantInt::get(ResType, -1, /*isSigned=*/true);

// Collect the index from the array original object. The array index cannot
// have side effects because we emit it.
const Expr *ArrayIdx = ArrayIndexVisitor().Visit(E);
if (!ArrayIdx || ArrayIdx->HasSideEffects(Ctx))
return nullptr;

// Generate the calculation:
//
// Ty Object[n_1][n_2]...[n_m]; /* M-dimensional array */
//
// BaseObj = Object[n_1]...[n_{x-1}];
//
// BaseObjTySize = sizeof( typeof( BaseObj ) );
// ArrayBaseTySize = sizeof( typeof( BaseObj[0] ) );
//
// Size = ArrayBaseTySize - (BaseObjTySize * ArrayIdx);
// return Size > 0 ? Size : 0;
//
Value *Res = EmitScalarExpr(ArrayIdx);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to check whether ArrayIdx has side-effects? (If EmittedE is non-null, we don't check for side-effects otherwise.)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do that on line 1156.

Res = Builder.CreateIntCast(Res, ResType,
ArrayIdx->getType()->isSignedIntegerType());

Value *BaseObjTySize = ConstantInt::get(
ResType, Ctx.getTypeSizeInChars(BaseObj->getType()).getQuantity(),
/*isSigned=*/true);
Value *ArrayBaseTySize = ConstantInt::get(
ResType, Ctx.getTypeSizeInChars(ArrayBase->getType()).getQuantity(),
/*isSigned=*/true);

Res = Builder.CreateMul(BaseObjTySize, Res);
Res = Builder.CreateSub(ArrayBaseTySize, Res);
return Builder.CreateSelect(Builder.CreateIsNotNeg(Res), Res,
ConstantInt::get(ResType, 0, /*isSigned=*/true));
}

/// Returns a Value corresponding to the size of the given expression.
/// This Value may be either of the following:
/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
Expand Down Expand Up @@ -1073,18 +1252,21 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
}
}

// LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
// evaluate E for side-effects. In either case, we shouldn't lower to
// @llvm.objectsize.
if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
return getDefaultBuiltinObjectSizeResult(Type, ResType);

if (IsDynamic) {
// Emit special code for a flexible array member with the "counted_by"
// attribute.
if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
return V;
}

// LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
// evaluate E for side-effects. In either case, we shouldn't lower to
// @llvm.objectsize.
if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
return getDefaultBuiltinObjectSizeResult(Type, ResType);
if (Value *V = tryToCalculateSubObjectSize(E, Type, ResType))
return V;
}

Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
assert(Ptr->getType()->isPointerTy() &&
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -5197,6 +5197,12 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmittedE,
bool IsDynamic);

/// Try to calculate the sub-object size (i.e. \p Type's least significant
/// bit is set). It afoids the complication in conveying the sub-object
/// information to the backend.
llvm::Value *tryToCalculateSubObjectSize(const Expr *E, unsigned Type,
llvm::IntegerType *ResType);

/// Emits the size of E, as required by __builtin_object_size. This
/// function is aware of pass_object_size parameters, and will act accordingly
/// if E is a parameter with the pass_object_size attribute.
Expand Down
54 changes: 27 additions & 27 deletions clang/test/CodeGen/attr-counted-by.c
Original file line number Diff line number Diff line change
Expand Up @@ -405,33 +405,33 @@ size_t test3_bdos(struct annotated *p) {
// SANITIZE-WITHOUT-ATTR-NEXT: entry:
// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
// SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM]]
// SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX5]], align 4, !tbaa [[TBAA2]]
// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM]]
// SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
// SANITIZE-WITHOUT-ATTR-NEXT: [[ADD:%.*]] = add nsw i32 [[INDEX]], 1
// SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM17:%.*]] = sext i32 [[ADD]] to i64
// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM17]]
// SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX18]], align 4, !tbaa [[TBAA2]]
// SANITIZE-WITHOUT-ATTR-NEXT: [[ADD31:%.*]] = add nsw i32 [[INDEX]], 2
// SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM32:%.*]] = sext i32 [[ADD31]] to i64
// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM32]]
// SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX33]], align 4, !tbaa [[TBAA2]]
// SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM6:%.*]] = sext i32 [[ADD]] to i64
// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM6]]
// SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX7]], align 4, !tbaa [[TBAA2]]
// SANITIZE-WITHOUT-ATTR-NEXT: [[ADD13:%.*]] = add nsw i32 [[INDEX]], 2
// SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM14:%.*]] = sext i32 [[ADD13]] to i64
// SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM14]]
// SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX15]], align 4, !tbaa [[TBAA2]]
// SANITIZE-WITHOUT-ATTR-NEXT: ret void
//
// NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test4(
// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[FAM_IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr nocapture noundef writeonly [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[FAM_IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry:
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM]]
// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX3]], align 4, !tbaa [[TBAA2]]
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM]]
// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ADD:%.*]] = add nsw i32 [[INDEX]], 1
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM9:%.*]] = sext i32 [[ADD]] to i64
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM9]]
// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX10]], align 4, !tbaa [[TBAA2]]
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ADD17:%.*]] = add nsw i32 [[INDEX]], 2
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM18:%.*]] = sext i32 [[ADD17]] to i64
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM18]]
// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX19]], align 4, !tbaa [[TBAA2]]
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM2:%.*]] = sext i32 [[ADD]] to i64
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM2]]
// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX3]], align 4, !tbaa [[TBAA2]]
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ADD5:%.*]] = add nsw i32 [[INDEX]], 2
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM6:%.*]] = sext i32 [[ADD5]] to i64
// NO-SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM6]]
// NO-SANITIZE-WITHOUT-ATTR-NEXT: store i32 255, ptr [[ARRAYIDX7]], align 4, !tbaa [[TBAA2]]
// NO-SANITIZE-WITHOUT-ATTR-NEXT: ret void
//
void test4(struct annotated *p, int index, int fam_idx) {
Expand Down Expand Up @@ -471,13 +471,13 @@ void test4(struct annotated *p, int index, int fam_idx) {
// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i64 [[TMP3]], i64 0
// NO-SANITIZE-WITH-ATTR-NEXT: ret i64 [[TMP7]]
//
// SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4_bdos(
// SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
// SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test4_bdos(
// SANITIZE-WITHOUT-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2]] {
// SANITIZE-WITHOUT-ATTR-NEXT: entry:
// SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1
//
// NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test4_bdos(
// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
// NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test4_bdos(
// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry:
// NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1
//
Expand Down Expand Up @@ -1359,13 +1359,13 @@ int test15(int idx) {
// NO-SANITIZE-WITH-ATTR-NEXT: entry:
// NO-SANITIZE-WITH-ATTR-NEXT: ret i64 -1
//
// SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test19(
// SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
// SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test19(
// SANITIZE-WITHOUT-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
// SANITIZE-WITHOUT-ATTR-NEXT: entry:
// SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1
//
// NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test19(
// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
// NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i64 @test19(
// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry:
// NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1
//
Expand Down
Loading
Loading