llvm
diff --git a/‎clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
Lines changed: 2 additions & 17 deletions b/‎clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
Lines changed: 2 additions & 17 deletions
diff --git a/‎clang/include/clang/CIR/Dialect/IR/CIREnumAttr.td
Lines changed: 38 additions & 0 deletions b/‎clang/include/clang/CIR/Dialect/IR/CIREnumAttr.td
Lines changed: 38 additions & 0 deletions
diff --git a/‎llvm/include/llvm/IR/RuntimeLibcalls.h
Lines changed: 10 additions & 3 deletions b/‎llvm/include/llvm/IR/RuntimeLibcalls.h
Lines changed: 10 additions & 3 deletions
diff --git a/‎llvm/include/llvm/TableGen/StringToOffsetTable.h
Lines changed: 6 additions & 1 deletion b/‎llvm/include/llvm/TableGen/StringToOffsetTable.h
Lines changed: 6 additions & 1 deletion
diff --git a/‎llvm/lib/IR/RuntimeLibcalls.cpp
Lines changed: 1 addition & 0 deletions b/‎llvm/lib/IR/RuntimeLibcalls.cpp
Lines changed: 1 addition & 0 deletions
diff --git a/‎llvm/lib/TableGen/StringToOffsetTable.cpp
Lines changed: 5 additions & 5 deletions b/‎llvm/lib/TableGen/StringToOffsetTable.cpp
Lines changed: 5 additions & 5 deletions
diff --git a/‎llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Lines changed: 13 additions & 12 deletions b/‎llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Lines changed: 13 additions & 12 deletions
diff --git a/‎llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Lines changed: 3 additions & 5 deletions b/‎llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Lines changed: 3 additions & 5 deletions
diff --git a/‎llvm/test/Analysis/CostModel/AArch64/reduce-and.ll
Lines changed: 2 additions & 2 deletions b/‎llvm/test/Analysis/CostModel/AArch64/reduce-and.ll
Lines changed: 2 additions & 2 deletions
diff --git a/‎llvm/test/Analysis/CostModel/AArch64/reduce-or.ll
Lines changed: 2 additions & 2 deletions b/‎llvm/test/Analysis/CostModel/AArch64/reduce-or.ll
Lines changed: 2 additions & 2 deletions
@@ -14,10 +14,10 @@
 #define CLANG_CIR_DIALECT_IR_CIRATTRS_TD
 
 include "mlir/IR/BuiltinAttributeInterfaces.td"
-include "mlir/IR/EnumAttr.td"
 
-include "clang/CIR/Dialect/IR/CIRDialect.td"
 include "clang/CIR/Dialect/IR/CIRAttrConstraints.td"
+include "clang/CIR/Dialect/IR/CIRDialect.td"
+include "clang/CIR/Dialect/IR/CIREnumAttr.td"
 
 //===----------------------------------------------------------------------===//
 // CIR Attrs
@@ -42,21 +42,6 @@ class CIR_TypedAttr<string name, string attrMnemonic, list<Trait> traits = []>
   let assemblyFormat = [{}];
 }
 
-class CIR_I32EnumAttr<string name, string summary, list<I32EnumAttrCase> cases>
-    : I32EnumAttr<name, summary, cases> {
-  let cppNamespace = "::cir";
-}
-
-class CIR_I64EnumAttr<string name, string summary, list<I64EnumAttrCase> cases>
-    : I64EnumAttr<name, summary, cases> {
-  let cppNamespace = "::cir";
-}
-
-class CIR_EnumAttr<EnumAttrInfo info, string name = "", list<Trait> traits = []>
-    : EnumAttr<CIR_Dialect, info, name, traits> {
-  let assemblyFormat = "`<` $value `>`";
-}
-
 class CIRUnitAttr<string name, string attrMnemonic, list<Trait> traits = []>
     : CIR_Attr<name, attrMnemonic, traits> {
   let returnType = "bool";
 
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CIR dialect enum base classes
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_CIR_DIALECT_IR_CIRENUMATTR_TD
+#define CLANG_CIR_DIALECT_IR_CIRENUMATTR_TD
+
+include "mlir/IR/EnumAttr.td"
+
+class CIR_I32EnumAttr<string name, string summary, list<I32EnumAttrCase> cases>
+    : I32EnumAttr<name, summary, cases> {
+  let cppNamespace = "::cir";
+}
+
+class CIR_I64EnumAttr<string name, string summary, list<I64EnumAttrCase> cases>
+    : I64EnumAttr<name, summary, cases> {
+  let cppNamespace = "::cir";
+}
+
+class CIR_EnumAttr<EnumAttrInfo info, string name = "", list<Trait> traits = []>
+    : EnumAttr<CIR_Dialect, info, name, traits> {
+  let assemblyFormat = "`<` $value `>`";
+}
+
+class CIR_DefaultValuedEnumParameter<EnumAttrInfo info, string value = "">
+    : EnumParameter<info> {
+  let defaultValue = value;
+}
+
+#endif // CLANG_CIR_DIALECT_IR_CIRENUMATTR_TD
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/StringTable.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/Support/AtomicOrdering.h"
@@ -77,12 +78,16 @@ struct RuntimeLibcallsInfo {
   /// Get the libcall routine name for the specified libcall.
   // FIXME: This should be removed. Only LibcallImpl should have a name.
   const char *getLibcallName(RTLIB::Libcall Call) const {
-    return LibCallImplNames[LibcallImpls[Call]];
+    return getLibcallImplName(LibcallImpls[Call]);
   }
 
   /// Get the libcall routine name for the specified libcall implementation.
+  // FIXME: Change to return StringRef
   static const char *getLibcallImplName(RTLIB::LibcallImpl CallImpl) {
-    return LibCallImplNames[CallImpl];
+    if (CallImpl == RTLIB::Unsupported)
+      return nullptr;
+    return RuntimeLibcallImplNameTable[RuntimeLibcallNameOffsetTable[CallImpl]]
+        .data();
   }
 
   /// Return the lowering's selection of implementation call for \p Call
@@ -144,7 +149,9 @@ struct RuntimeLibcallsInfo {
 
   /// Names of concrete implementations of runtime calls. e.g. __ashlsi3 for
   /// SHL_I32
-  LLVM_ABI static const char *const LibCallImplNames[RTLIB::NumLibcallImpls];
+  LLVM_ABI static const char RuntimeLibcallImplNameTableStorage[];
+  LLVM_ABI static const StringTable RuntimeLibcallImplNameTable;
+  LLVM_ABI static const uint16_t RuntimeLibcallNameOffsetTable[];
 
   /// Map from a concrete LibcallImpl implementation to its RTLIB::Libcall kind.
   LLVM_ABI static const RTLIB::Libcall ImplToLibcall[RTLIB::NumLibcallImpls];
 
@@ -23,10 +23,15 @@ namespace llvm {
 class StringToOffsetTable {
   StringMap<unsigned> StringOffset;
   std::string AggregateString;
+
+  /// If this is to be a static class member, the prefix to use (i.e. class name
+  /// plus ::)
+  const StringRef ClassPrefix;
   const bool AppendZero;
 
 public:
-  StringToOffsetTable(bool AppendZero = true) : AppendZero(AppendZero) {
+  StringToOffsetTable(bool AppendZero = true, StringRef ClassPrefix = "")
+      : ClassPrefix(ClassPrefix), AppendZero(AppendZero) {
     // Ensure we always put the empty string at offset zero. That lets empty
     // initialization also be zero initialization for offsets into the table.
     GetOrAddStringOffset("");
 
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/RuntimeLibcalls.h"
+#include "llvm/ADT/StringTable.h"
 
 using namespace llvm;
 using namespace RTLIB;
 
@@ -38,8 +38,8 @@ void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS,
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Woverlength-strings"
 #endif
-static constexpr char {}Storage[] = )",
-                Name);
+{} constexpr char {}{}Storage[] = )",
+                ClassPrefix.empty() ? "static" : "", ClassPrefix, Name);
 
   // MSVC silently miscompiles string literals longer than 64k in some
   // circumstances. The build system sets EmitLongStrLiterals to false when it
@@ -83,10 +83,10 @@ static constexpr char {}Storage[] = )",
 #pragma GCC diagnostic pop
 #endif
 
-static constexpr llvm::StringTable
-{0} = {0}Storage;
+{1}constexpr llvm::StringTable
+{2}{0} = {0}Storage;
 )",
-                Name);
+                Name, ClassPrefix.empty() ? "static " : "", ClassPrefix);
 }
 
 void StringToOffsetTable::EmitString(raw_ostream &O) const {
 
@@ -3724,7 +3724,7 @@ InstructionCost AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
 
 InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(
     unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
-    bool HasRealUse, const Instruction *I, Value *Scalar,
+    const Instruction *I, Value *Scalar,
     ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {
   assert(Val->isVectorTy() && "This must be a vector type");
 
@@ -3744,12 +3744,10 @@ InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(
     }
 
     // The element at index zero is already inside the vector.
-    // - For a physical (HasRealUse==true) insert-element or extract-element
+    // - For a insert-element or extract-element
     // instruction that extracts integers, an explicit FPR -> GPR move is
     // needed. So it has non-zero cost.
-    // - For the rest of cases (virtual instruction or element type is float),
-    // consider the instruction free.
-    if (Index == 0 && (!HasRealUse || !Val->getScalarType()->isIntegerTy()))
+    if (Index == 0 && !Val->getScalarType()->isIntegerTy())
       return 0;
 
     // This is recognising a LD1 single-element structure to one lane of one
@@ -3899,25 +3897,28 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
                                                    unsigned Index,
                                                    const Value *Op0,
                                                    const Value *Op1) const {
-  bool HasRealUse =
-      Opcode == Instruction::InsertElement && Op0 && !isa<UndefValue>(Op0);
-  return getVectorInstrCostHelper(Opcode, Val, CostKind, Index, HasRealUse);
+  // Treat insert at lane 0 into a poison vector as having zero cost. This
+  // ensures vector broadcasts via an insert + shuffle (and will be lowered to a
+  // single dup) are treated as cheap.
+  if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
+      isa<PoisonValue>(Op0))
+    return 0;
+  return getVectorInstrCostHelper(Opcode, Val, CostKind, Index);
 }
 
 InstructionCost AArch64TTIImpl::getVectorInstrCost(
     unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
     Value *Scalar,
     ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {
-  return getVectorInstrCostHelper(Opcode, Val, CostKind, Index, false, nullptr,
-                                  Scalar, ScalarUserAndIdx);
+  return getVectorInstrCostHelper(Opcode, Val, CostKind, Index, nullptr, Scalar,
+                                  ScalarUserAndIdx);
 }
 
 InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction &I,
                                                    Type *Val,
                                                    TTI::TargetCostKind CostKind,
                                                    unsigned Index) const {
-  return getVectorInstrCostHelper(I.getOpcode(), Val, CostKind, Index,
-                                  true /* HasRealUse */, &I);
+  return getVectorInstrCostHelper(I.getOpcode(), Val, CostKind, Index, &I);
 }
 
 InstructionCost AArch64TTIImpl::getScalarizationOverhead(
 
@@ -65,16 +65,14 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
 
   // A helper function called by 'getVectorInstrCost'.
   //
-  // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse'
-  // indicates whether the vector instruction is available in the input IR or
-  // just imaginary in vectorizer passes.
-  /// \param ScalarUserAndIdx encodes the information about extracts from a
+  // 'Val' and 'Index' are forwarded from 'getVectorInstrCost';
+  // \param ScalarUserAndIdx encodes the information about extracts from a
   /// vector with 'Scalar' being the value being extracted,'User' being the user
   /// of the extract(nullptr if user is not known before vectorization) and
   /// 'Idx' being the extract lane.
   InstructionCost getVectorInstrCostHelper(
       unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
-      bool HasRealUse, const Instruction *I = nullptr, Value *Scalar = nullptr,
+      const Instruction *I = nullptr, Value *Scalar = nullptr,
       ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx = {}) const;
 
 public:
 
@@ -13,8 +13,8 @@ define void @reduce() {
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 9 for: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %V1i8 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V3i8 = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V1i8 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:5 Lat:6 SizeLat:6 for: %V3i8 = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 15 for: %V8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 17 for: %V16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
 
@@ -13,8 +13,8 @@ define void @reduce() {
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 5 for: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 9 for: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %V1i8 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V3i8 = call i8 @llvm.vector.reduce.or.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V1i8 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:5 Lat:6 SizeLat:6 for: %V3i8 = call i8 @llvm.vector.reduce.or.v3i8(<3 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 7 for: %V4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 15 for: %V8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 17 for: %V16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)